现在的的网卡为了提升性能,大部分已经已经使用NAPI的方式接受数据帧。linxu内核使用了struct napi_struct来管理NAPI设备的新特性和操作。系统受到数据包后,支持NAPI模式的网络设备会将网络设备的struct napi_struct数据结构的实例放到CPU的struct softnet_data数据结构的poll_list中。当网络子系统接受到软中断NET_RX_SOFTIRQ被指定时,会遍历CPU的Poll_list链表依次执行NAPI设备的poll方法,poll方法一次会从硬件缓冲区读取多个数据帧。所以struct napi_struct数据结构的设计应该考虑以下因素:
struct napi_struct {
struct list_head poll_list;
unsigned long state;
int weight;
unsigned int gro_count;
int (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
spinlock_t poll_lock;
int poll_owner;
#endif
struct net_device *dev;
struct sk_buff *gro_list;
struct sk_buff *skb;
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
};
struct napi_struct数据结构中各数据域的含义如下:
NAPI的硬件中断主要工作就是将网络设备的napi实例添加到CPU的poll_list链表中等待调度,所以他的处理是要分成两部分的。我们以e1000网卡为实例,e1000的硬件中断注册调用e1000_request_irq完成,其中的硬件中断处理函数就是e1000_intr。
static int e1000_request_irq(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
irq_handler_t handler = e1000_intr; //硬件中断段处理函数
int irq_flags = IRQF_SHARED;
int err;
err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,
netdev);
if (err) {
e_err(probe, "Unable to allocate interrupt Error: %d\n", err);
}
return err;
}
硬件中断处理函数e1000_intr首先调用napi_schedule_prep判断数据域state的NAPI_STATE_SCHED是否设置, 如果已经设置说明设备已经在poll_list列表中。如果没有设置就__napi_schedule调用用__napi_schedule继续处理,并设置state的NAPI_STATE_SCHED位。
static irqreturn_t e1000_intr(int irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 icr = er32(ICR);
if (unlikely((!icr)))
return IRQ_NONE; /* Not our interrupt */
/* we might have caused the interrupt, but the above
* read cleared it, and just in case the driver is
* down there is nothing to do so return handled
*/
if (unlikely(test_bit(__E1000_DOWN, &adapter->flags))) //判断网卡是否down
return IRQ_HANDLED;
if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) {
hw->get_link_status = 1;
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->flags))
schedule_delayed_work(&adapter->watchdog_task, 1);
}
/* disable interrupts, without the synchronize_irq bit */
ew32(IMC, ~0);
E1000_WRITE_FLUSH();
if (likely(napi_schedule_prep(&adapter->napi))) { //判断数据域state的NAPI_STATE_SCHED是否设置,
adapter->total_tx_bytes = 0; //如果已经设置说明设备已经在poll_list列表中
adapter->total_tx_packets = 0;
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(&adapter->napi);//将设备的napi_struct添加到CPU的poll_list链表,并标记软中断NET_RX_SOFTIRQ
} else {
/* this really should not happen! if it does it is basically a
* bug, but not a hard error, so enable ints and continue
*/
if (!test_bit(__E1000_DOWN, &adapter->flags))
e1000_irq_enable(adapter);
}
return IRQ_HANDLED;
}
__napi_schedule说先是关闭并保存中断,然后调用____napi_schedule继续处理,处理完后恢复中断。
void __napi_schedule(struct napi_struct *n)
{
unsigned long flags;
local_irq_save(flags); //关闭并保存中断
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags); //恢复中断
}
____napi_schedule主要做两件事:
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list); //将软中断处理函数添加到CPU的poll_list
__raise_softirq_irqoff(NET_RX_SOFTIRQ); //标记软件中断
}
至此网络设备的硬件中断已经处理完成,接下来由软件中断中调用napi的poll函数从设备缓冲区拷贝数据帧到内核空间。