当前位置: 首页 > 工具软件 > napi > 使用案例 >

napi机制

邵旺
2023-12-01

软中断

在我们的系统中,有几种软中断(在硬件中断的下部分执行):

enum
{
	HI_SOFTIRQ=0,
	TIMER_SOFTIRQ,
	NET_TX_SOFTIRQ,
	NET_RX_SOFTIRQ,
	BLOCK_SOFTIRQ,
	IRQ_POLL_SOFTIRQ,
	TASKLET_SOFTIRQ,
	SCHED_SOFTIRQ,
	HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
			    numbering. Sigh! */
	RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */

	NR_SOFTIRQS
};

其中 NET_TX_SOFTIRQ 和 NET_RX_SOFTIRQ 就是我们网络收发包的软中断。

系统会为每一个cpu创建一个内核任务,见kernel/softirq.c

static struct smp_hotplug_thread softirq_threads = {
	.store			= &ksoftirqd,
	.thread_should_run	= ksoftirqd_should_run,
	.thread_fn		= run_ksoftirqd,
	.thread_comm		= "ksoftirqd/%u",
};

static __init int spawn_ksoftirqd(void)
{
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
				  takeover_tasklets);
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));

	return 0;
}
early_initcall(spawn_ksoftirqd);

static void run_ksoftirqd(unsigned int cpu)
{
    local_irq_disable();
    if (local_softirq_pending()) {
        __do_softirq();
        local_irq_enable();
        ....
    }
}

asmlinkage __visible void __softirq_entry __do_softirq(void)
{
    __u32 pending;
    ....
    pending = local_softirq_pending();//获取当前cpu软中断状态(哪些软中断需要处理)
    ....
   	while ((softirq_bit = ffs(pending))) { //一个一个处理
	unsigned int vec_nr;
	int prev_count;

	h += softirq_bit - 1;

	vec_nr = h - softirq_vec;
	prev_count = preempt_count();

	kstat_incr_softirqs_this_cpu(vec_nr);

	trace_softirq_entry(vec_nr);
	h->action(h);  //处理注册的软中断回调(open_softirq)
	trace_softirq_exit(vec_nr);
	if (unlikely(prev_count != preempt_count())) {
		pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
		       vec_nr, softirq_to_name[vec_nr], h->action,
		       prev_count, preempt_count());
		preempt_count_set(prev_count);
	}
	h++;
	pending >>= softirq_bit;
}
}

当调用__napi_schedule的时候,会触发一个 NET_RX_SOFTIRQ 类型的软件中断,触发内核线程执行。

注意:在调用__napi_schedule之前,需要调用 napi_schedule_prep(),判断napi 不为 NAPI_STATE_DISABLE状态,并设置napi NAPI_STATE_SCHED 状态。驱动在处理完成 <= budget 次数以后,需要调用 napi_complete(napi),来清除NAPI_STATE_SCHED 状态。

napi_schedule_prep (struct napi_struct *n)
{
    return !napi_disable_pending(n) && 
        !test_and_set(NAPI_STATE_SCHED, &n->state);
}

static inline void ____napi_schedule(struct softnet_data *sd,
				     struct napi_struct *napi)
{
	list_add_tail(&napi->poll_list, &sd->poll_list);//添加到链表
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);//发出软中断信号
}



void __raise_softirq_irqoff(unsigned int nr)
{
	trace_softirq_raise(nr);
	or_softirq_pending(1UL << nr); //告诉内核,NET_RX_SOFTIRQ 软中断产生了,(设置pending状态)
}

void __napi_complete(struct napi_struct *n)
{
	....
	clear_bit(NAPI_STATE_SCHED, &n->state);
}


然后内核线程调用 run_ksoftirqd -> __do_softirq

asmlinkage __visible void __softirq_entry __do_softirq(void)
{
	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
	unsigned long old_flags = current->flags;
	int max_restart = MAX_SOFTIRQ_RESTART;
	struct softirq_action *h;
	bool in_hardirq;
	__u32 pending;
	int softirq_bit;

	/*
	 * Mask out PF_MEMALLOC s current task context is borrowed for the
	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
	 * again if the socket is related to swap
	 */
	current->flags &= ~PF_MEMALLOC;

	pending = local_softirq_pending();//获取软中断pending状态
	account_irq_enter_time(current);

	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
	in_hardirq = lockdep_softirq_start();

restart:
	/* Reset the pending bitmask before enabling irqs */
	set_softirq_pending(0);

	local_irq_enable();

	h = softirq_vec;

	while ((softirq_bit = ffs(pending))) {
		unsigned int vec_nr;
		int prev_count;

		h += softirq_bit - 1;

		vec_nr = h - softirq_vec;
		prev_count = preempt_count();

		kstat_incr_softirqs_this_cpu(vec_nr);

		trace_softirq_entry(vec_nr);
		h->action(h);    //这就是我们注册的中断回调 open_softirq
		trace_softirq_exit(vec_nr);
		if (unlikely(prev_count != preempt_count())) {
			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
			       vec_nr, softirq_to_name[vec_nr], h->action,
			       prev_count, preempt_count());
			preempt_count_set(prev_count);
		}
		h++;
		pending >>= softirq_bit;
	}

	rcu_bh_qs();
	local_irq_disable();

	pending = local_softirq_pending();
	if (pending) {
		if (time_before(jiffies, end) && !need_resched() &&
		    --max_restart)
			goto restart;

		wakeup_softirqd();
	}

	lockdep_softirq_end(in_hardirq);
	account_irq_exit_time(current);
	__local_bh_enable(SOFTIRQ_OFFSET);
	WARN_ON_ONCE(in_interrupt());
	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
}

网络收发包软中断的初始化

在 net/core/dev.c 文件中,有一个函数 net_dev_init(),里面为每一个CPU初始化了skb的队列,还有我们的软中断: NET_TX_SOFTIRQ和NET_RX_SOFTIRQ,对应的处理函数为net_tx_action和net_rx_action.

   static int __init net_dev_init(void)
   {
       ....
       for_each_possible_cpu(i) {
       struct softnet_data *sd = &per_cpu(softnet_data, i);
         skb_queue_head_init(&sd->input_pkt_queue);
         skb_queue_head_init(&sd->input_pkt_queue);
         INIT_LIST_HEAD(&sd->poll_list);
         
     }
    open_softirq(NET_TX_SOFTIRQ, net_tx_action);//注册中断回调
    open_softirq(NET_RX_SOFTIRQ, net_rx_action);//注册中断回调
   }

netif_napi_add

函数原型:
    void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct naoi_struct *, int), int weight);
    
功能介绍:
  1、初始化napi结构成员, 并将自己挂在napi的链表里面
       napi->poll, napi->weight,napi->state.napi->timer,INIT_LIST_HEAD(&napi->poll_list)
       
2、高版本的kernel还创建了一个内核任务,调用napi_kthread_create函数创建了一个napi任务,任务函数为:napi_threaded_poll
    if (dev->threaded && napi_kthread_create(napi))
        dev->threaded = 0;
static  int napi_kthread_create(struct napi_struct *n)
{
	n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", n->dev->name,n->napi_id);//poll函数函数
}

该函数一般由具体的网卡驱动调用,并将poll函数传入。

NIC硬件中断

当网卡产生中断以后,在中断函数里面处理的事情很少,包含:关闭中断和__napi_schedule.
__napi_schedule里面有两种处理方式:
1、如果创建了 napi任务,则唤醒任务,wake_up_process(thread),最后调用任务处理函数napi_threaded_poll进行包处理

static int napi_threaded_poll(void *data)
{
    struct napi_struct *napi = data;
    for(;;) {
         __napi_poll(napi, &repoll);
    }
}

static int __naou_poll(struct napi_struct *napi, bool *repoll)
{
    if (test_bit(NAPI_STATE_SCHES, &N->STATE)) {
        work = n->poll(n, weight);//这就是网卡驱动注册的poll函数
	}
}

2、否则,触发一个NET_RX_SOFTIRQ软中断,__raise_softirq_irqoff(NET_RX_SOFTIRQ), 然后调用软中断处理函数 net_rx_action.

static void net_rx_action(struct softirq_action *h)
{
    LIST_HEAD(list);
    for(;;) {
        if (list_empty(&list)) {
            break;
        }
        n = list_first_entry(&list, struct napi_struct, poll_list);
        budge -= napi_poll(n, &repoll)
    }
}
napi_poll-> __napi_poll//与上面一致。

napi_threaded_poll任务函数

static int napi_kthread_create(struct napi_struct *n)
{
    int err = 0;
    n->thread = kthread_run(napi_thread_poll, n, "napi/%s-%d", n->dev->name, n->napi_id);
    return err;
}

static int napi_threaded_poll(void *data)
{
    struct napi_struct *napi = data;
    void *have;
    while (!napi_thread_wait(napi)) {
        for(;;) {
           .....
           __napi_poll(napi, &repoll);
           .... 
        }
    }
}

static int __napi_poll(struct napi_struct *n, bool *repoll)
{
    int work, weight;
    weight = n->weight;
    if (test_bit(NAPI_STATE_SCHED, &n->state)) {
        work = n->poll(n, weight);
    }
}

napi_threaded_poll->__napi_poll->poll钩子函数

软中断处理函数

软中断处理函数net_rx_action中,轮训Napi_list链表,然后调用 napi_poll函数,
在napi_pool函数里面,首先从链表中删掉本napi节点,然后调用__napi_pool函数,
在__napi_pool函数里面调用我们设备注册的pool钩子。net_rx_action->napi_poll->__napi_pool->poll钩子

代码参考

 类似资料: