tcp_conn_lookup
/**
* tcp_protocol中查找conn接口
* direct,drop,peer_cid为输出参数
*/
static struct dp_vs_conn *
tcp_conn_lookup(struct dp_vs_proto *proto, const struct dp_vs_iphdr *iph,
struct rte_mbuf *mbuf, int *direct, bool reverse, bool *drop,
lcoreid_t *peer_cid)
{
struct tcphdr *th, _tcph;
struct dp_vs_conn *conn;
assert(proto && iph && mbuf);
//首先获取tcphdr并对长度进行校验,dp_vs_iphdr->len中保存ip层数据包头长度
th = mbuf_header_pointer(mbuf, iph->len, sizeof(_tcph), &_tcph);
//如果获取tcp数据报头失败,直接返回NULL
if (unlikely(!th))
return NULL;
//首先通过dp_vs_blklst_lookup查找ip黑名单,<目的ip,目的端口,源端口>,如果在黑名单列表中,查找失败,并且设置drop=true
if (dp_vs_blklst_lookup(iph->af, iph->proto, &iph->daddr,
th->dest, &iph->saddr)) {
*drop = true;
return NULL;
}
//调用dp_vs_conn_get,根据四元组查找对应的conn
conn = dp_vs_conn_get(iph->af, iph->proto,
&iph->saddr, &iph->daddr, th->source, th->dest, direct, reverse);
/*
* L2 confirm neighbour
* pkt in from client confirm neighbour to client
* pkt out from rs confirm neighbour to rs
*/
//最后要确认邻居子系统
if (conn != NULL) {
if (th->ack) {
if ((*direct == DPVS_CONN_DIR_INBOUND) && conn->out_dev
&& (!inet_is_addr_any(tuplehash_in(conn).af, &conn->out_nexthop))) {
neigh_confirm(tuplehash_in(conn).af, &conn->out_nexthop,
conn->out_dev);
} else if ((*direct == DPVS_CONN_DIR_OUTBOUND) && conn->in_dev
&& (!inet_is_addr_any(tuplehash_out(conn).af, &conn->in_nexthop))) {
neigh_confirm(tuplehash_out(conn).af, &conn->in_nexthop,
conn->in_dev);
}
}
} else {
//如果未找到连接,查看是否需要redirect至其他lcore
//The conneciton redirect tuple is only for the reverse tuple
struct dp_vs_redirect *r;
//如果查找到了redirect相关信息,则设置peer_cid
r = dp_vs_redirect_get(iph->af, iph->proto,
&iph->saddr, &iph->daddr,
th->source, th->dest);
if (r) {
*peer_cid = r->cid;
}
}
return conn;
}
dp_vs_conn_get
/**
* try lookup and hold dp_vs_conn{} by packet tuple
*
* <af, proto, saddr, sport, daddr, dport>.
*
* dp_vs_conn_tab[] for current lcore will be looked up.
* return conn found and direction as well or NULL if not exist.
* 使用五元组<af,daddr,dport,saddr,sport>进行索引,查找表this_conn_tbl,每个lcore核一个,桶大小固定的.就是一个二维数组,每个
* 元素又是链表.
*/
struct dp_vs_conn *dp_vs_conn_get(int af, uint16_t proto,
const union inet_addr *saddr, const union inet_addr *daddr,
uint16_t sport, uint16_t dport, int *dir, bool reverse)
{
uint32_t hash;
struct conn_tuple_hash *tuphash;
struct dp_vs_conn *conn = NULL;
#ifdef CONFIG_DPVS_IPVS_DEBUG
char sbuf[64], dbuf[64];
#endif
//使用五元组<af,daddr,dport,saddr,sport>计算hash值,AF主要用于区分ip地址类型,reverse标识是否调换四元组方向
if (unlikely(reverse)) {
hash = dp_vs_conn_hashkey(af, daddr, dport, saddr, sport,
DPVS_CONN_TBL_MASK);
} else {
hash = dp_vs_conn_hashkey(af, saddr, sport, daddr, dport,
DPVS_CONN_TBL_MASK);
}
#ifdef CONFIG_DPVS_IPVS_CONN_LOCK
//如果定义了CONN_LOCK,则this_conn_tbl是全局对所有lcore可见,否则在per-lcore上的this_conn_tbl hash表中查找
rte_spinlock_lock(&this_conn_lock);
#endif
if (unlikely(reverse)) { /* swap source/dest for lookup */
list_for_each_entry(tuphash, &this_conn_tbl[hash], list) {
if (tuphash->sport == dport
&& tuphash->dport == sport
&& inet_addr_equal(af, &tuphash->saddr, daddr)
&& inet_addr_equal(af, &tuphash->daddr, saddr)
&& tuphash->proto == proto
&& tuphash->af == af) {
/* hit */
//dp_vs_conn中内嵌conn_tuple_hash结构
conn = tuplehash_to_conn(tuphash);
//增加引用计数
rte_atomic32_inc(&conn->refcnt);
//如果传入参数dir非null,设置dir,tuphash->direct来自建立连接时初始化
if (dir)
*dir = tuphash->direct;
break;
}
}
} else {
list_for_each_entry(tuphash, &this_conn_tbl[hash], list) {
if (tuphash->sport == sport
&& tuphash->dport == dport
&& inet_addr_equal(af, &tuphash->saddr, saddr)
&& inet_addr_equal(af, &tuphash->daddr, daddr)
&& tuphash->proto == proto
&& tuphash->af == af) {
/* hit */
conn = tuplehash_to_conn(tuphash);
rte_atomic32_inc(&conn->refcnt);
if (dir)
*dir = tuphash->direct;
break;
}
}
}
#ifdef CONFIG_DPVS_IPVS_CONN_LOCK
rte_spinlock_unlock(&this_conn_lock);
#endif
#ifdef CONFIG_DPVS_IPVS_DEBUG
RTE_LOG(DEBUG, IPVS, "conn lookup: [%d] %s %s/%d -> %s/%d %s %s\\n",
rte_lcore_id(), inet_proto_name(proto),
inet_ntop(af, saddr, sbuf, sizeof(sbuf)) ? sbuf : "::", ntohs(sport),
inet_ntop(af, daddr, dbuf, sizeof(dbuf)) ? dbuf : "::", ntohs(dport),
conn ? "hit" : "miss", reverse ? "reverse" : "");
#endif
return conn;
}
dp_vs_conn_init
int dp_vs_conn_init(void)
{
int i, err;
lcoreid_t lcore;
char poolname[32];
/* init connection template table */
dp_vs_ct_tbl = rte_malloc_socket(NULL, sizeof(struct list_head) * DPVS_CONN_TBL_SIZE,
RTE_CACHE_LINE_SIZE, rte_socket_id());
for (i = 0; i < DPVS_CONN_TBL_SIZE; i++)
INIT_LIST_HEAD(&dp_vs_ct_tbl[i]);
rte_spinlock_init(&dp_vs_ct_lock);
/*
* unlike linux per_cpu() which can assign CPU number,
* RTE_PER_LCORE() can only access own instances.
* it make codes looks strange.
*/
//per-lcore this_conn_tbl初始化
rte_eal_mp_remote_launch(conn_init_lcore, NULL, SKIP_MASTER);
RTE_LCORE_FOREACH_SLAVE(lcore) {
if ((err = rte_eal_wait_lcore(lcore)) < 0) {
RTE_LOG(WARNING, IPVS, "%s: lcore %d: %s.\\n",
__func__, lcore, dpvs_strerror(err));
}
}
conn_ctrl_init();
/* connection cache on each NUMA socket */
for (i = 0; i < get_numa_nodes(); i++) {
snprintf(poolname, sizeof(poolname), "dp_vs_conn_%d", i);
dp_vs_conn_cache[i] = rte_mempool_create(poolname,
conn_pool_size,
sizeof(struct dp_vs_conn),
conn_pool_cache,
0, NULL, NULL, NULL, NULL,
i, 0);
if (!dp_vs_conn_cache[i]) {
err = EDPVS_NOMEM;
goto cleanup;
}
}
dp_vs_conn_rnd = (uint32_t)random();
return EDPVS_OK;
cleanup:
dp_vs_conn_term();
return err;
}
conn查找表初始化
static int conn_init_lcore(void *arg)
{
int i;
if (!rte_lcore_is_enabled(rte_lcore_id()))
return EDPVS_DISABLED;
if (netif_lcore_is_idle(rte_lcore_id()))
return EDPVS_IDLE;
this_conn_tbl = rte_malloc_socket(NULL,
sizeof(struct list_head) * DPVS_CONN_TBL_SIZE,
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!this_conn_tbl)
return EDPVS_NOMEM;
for (i = 0; i < DPVS_CONN_TBL_SIZE; i++)
INIT_LIST_HEAD(&this_conn_tbl[i]);
#ifdef CONFIG_DPVS_IPVS_CONN_LOCK
rte_spinlock_init(&this_conn_lock);
#endif
this_conn_count = 0;
return EDPVS_OK;
}