LINUX网卡驱动分析――Intel(R) PRO/100 Network Driver

微生善
2023-12-01
 
LINUX网卡驱动分析――Intel(R) PRO/100 Network Driver
最近学习LINUX驱动开发,看的是《LINUX DEVICE DRIVER》这本书,差不多能看懂,不过说实在的,都是些理论上的东西,没有什么实践,感觉提升比较慢,所以想拿LINUX自带的E100网卡驱动来分析和学习一下,看看人家大师们怎么写驱动的。然后如果有时间再写一个关于我的开发板的S3C2410上的网卡(CS8900A)驱动。
注:以下分析的是基于2.6.14上带的e100.c驱动源代码。
网卡是一个网络设备,同时也是一个PCI设备。E100网卡驱动就是按照PCI规范来编写的,同时又设及到驱动程序的内存映射和DMA操作,所以是比较综合的一个驱动程序。
一、模块的初始化。
module_init(e100_init_module); // 2.6 内核模块初始化注册
module_exit(e100_cleanup_module); // 模块清除注册
接着step into - àe100_init_module,
static int __init e100_init_module(void)
{
    // 检查打印级别是否大于1
    if(((1 << debug) - 1) & NETIF_MSG_DRV) {
        printk(KERN_INFO PFX "%s, %s/n", DRV_DESCRIPTION, DRV_VERSION);
        printk(KERN_INFO PFX "%s/n", DRV_COPYRIGHT);
    }
   // 调用 PCI 的模块注册函数,因为网卡是一个 PCI 设备
    return pci_module_init(&e100_driver);
}
 
static void __exit e100_cleanup_module(void)
{ // 清除PCI注册信息
    pci_unregister_driver(&e100_driver);
}
接下来看一下,pci_module_init(&e100_driver);
e100_driver是一个struct pci_driver类型。在代码中,做如下初始化:
static struct pci_driver e100_driver = {
    .name =         DRV_NAME,// DRIVER 名称
    .id_table =     e100_id_table,//e100 驱动支持的 PCI 设备列表
    .probe =        e100_probe,//PCI 探测函数指针
    .remove =       __devexit_p(e100_remove),// 移除函数
#ifdef CONFIG_PM
    .suspend =      e100_suspend,// 挂起操作
    .resume =       e100_resume,// 恢复
#endif
    .shutdown =    e100_shutdown,// 关闭,注意: LINUX DEVICE DRIVER 这本书中没有这一项。
};
pci_module_init其实是pci_register_driver的宏定义,实际执行pci模块注册过程。PCI注册过程除了初始化pci_driver 内部struct device_driver结构以外,还执行一些与linux设备模型相关的操作,可以参考drivers/pci.c中的初始化代码;下面我们还是将主要精力放在分析网卡驱动代码上。
接下来看一下探测函数:e100_probe;为了方便还是将代码贴一下:
static int __devinit e100_probe(struct pci_dev *pdev,
    const struct pci_device_id *ent)
{
    struct net_device *netdev;// 声明网络设备指针
    struct nic *nic;// 网卡信息结构指针
    int err;
   // 一看就知道了,分配空间嘛,然后根据打印级别控制打印
    if(!(netdev = alloc_etherdev(sizeof(struct nic)))) {
        if(((1 << debug) - 1) & NETIF_MSG_PROBE)
           printk(KERN_ERR PFX "Etherdev alloc failed, abort./n");
        return -ENOMEM;
    }
   // 网络设备的初始化,相关的函数注册
    netdev->open = e100_open;  // 打开
    netdev->stop = e100_close; // 关闭
    netdev->hard_start_xmit = e100_xmit_frame;// 开始传输
    netdev->get_stats = e100_get_stats;// 获取状态
// 设置多播列表
    netdev->set_multicast_list = e100_set_multicast_list; 
   // 设置物理MAC地址
    netdev->set_mac_address = e100_set_mac_address;
    netdev->change_mtu = e100_change_mtu;
    netdev->do_ioctl = e100_do_ioctl;
    SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
    netdev->tx_timeout = e100_tx_timeout;
    netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
    netdev->poll = e100_poll;
    netdev->weight = E100_NAPI_WEIGHT;
#ifdef CONFIG_NET_POLL_CONTROLLER
    netdev->poll_controller = e100_netpoll;
#endif
    strcpy(netdev->name, pci_name(pdev));
// 初始化完网络设备,然后与网卡信息进行绑定
// netdev_priv 是取一个指针, pointer to private data
    nic = netdev_priv(netdev);
    nic->netdev = netdev;
    nic->pdev = pdev;
    nic->msg_enable = (1 << debug) - 1;
    pci_set_drvdata(pdev, netdev);
    // 完成之后,激活设备
    if((err = pci_enable_device(pdev))) {
        DPRINTK(PROBE, ERR, "Cannot enable PCI device, aborting./n");
       goto err_out_free_dev;
    }
   // 取得和资源相关的标志
    if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
        DPRINTK(PROBE, ERR, "Cannot find proper PCI device "
           "base address, aborting./n");
       err = -ENODEV;
       goto err_out_disable_pdev;
    }
// 获取相关PCI资源,应该是配置寄存器映射的内存区
    if((err = pci_request_regions(pdev, DRV_NAME))) {
        DPRINTK(PROBE, ERR, "Cannot obtain PCI resources, aborting./n");
       goto err_out_disable_pdev;
    }
  // 设置32位DMA位掩码,一方面也为了测试配置
    if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {
        DPRINTK(PROBE, ERR, "No usable DMA configuration, aborting./n");
       goto err_out_free_res;
    }
// 空操作
    SET_MODULE_OWNER(netdev);
    SET_NETDEV_DEV(netdev, &pdev->dev);
// io映射成虚拟地址,供内核使用
    nic->csr = ioremap(pci_resource_start(pdev, 0), sizeof(struct csr));
    if(!nic->csr) {
        DPRINTK(PROBE, ERR, "Cannot map device registers, aborting./n");
       err = -ENOMEM;
       goto err_out_free_res;
    }
 
    if(ent->driver_data)
        nic->flags |= ich;
    else
        nic->flags &= ~ich;
 
    e100_get_defaults(nic);
 
    /* locks must be initialized before calling hw_reset */
    spin_lock_init(&nic->cb_lock);
    spin_lock_init(&nic->cmd_lock);
 
    /* Reset the device before pci_set_master() in case device is in some
     * funky state and has an interrupt pending - hint: we don't have the
     * interrupt handler registered yet. */
    e100_hw_reset(nic);
 
    pci_set_master(pdev);
 
    init_timer(&nic->watchdog);
    nic->watchdog.function = e100_watchdog;
    nic->watchdog.data = (unsigned long)nic;
    init_timer(&nic->blink_timer);
    nic->blink_timer.function = e100_blink_led;
    nic->blink_timer.data = (unsigned long)nic;
 
    INIT_WORK(&nic->tx_timeout_task,
       (void (*)(void *))e100_tx_timeout_task, netdev);
 
    if((err = e100_alloc(nic))) {
        DPRINTK(PROBE, ERR, "Cannot alloc driver memory, aborting./n");
       goto err_out_iounmap;
    }
 
    if((err = e100_eeprom_load(nic)))
       goto err_out_free;
 
    e100_phy_init(nic);
 
    memcpy(netdev->dev_addr, nic->eeprom, ETH_ALEN);
    if(!is_valid_ether_addr(netdev->dev_addr)) {
        DPRINTK(PROBE, ERR, "Invalid MAC address from "
           "EEPROM, aborting./n");
       err = -EAGAIN;
       goto err_out_free;
    }
 
    /* Wol magic packet can be enabled from eeprom */
    if((nic->mac >= mac_82558_D101_A4) &&
       (nic->eeprom[eeprom_id] & eeprom_id_wol))
        nic->flags |= wol_magic;
 
    /* ack any pending wake events, disable PME */
    pci_enable_wake(pdev, 0, 0);
 
    strcpy(netdev->name, "eth%d");
    if((err = register_netdev(netdev))) {
        DPRINTK(PROBE, ERR, "Cannot register net device, aborting./n");
       goto err_out_free;
    }
 
    DPRINTK(PROBE, INFO, "addr 0x%lx, irq %d, "
       "MAC addr %02X:%02X:%02X:%02X:%02X:%02X/n",
        pci_resource_start(pdev, 0), pdev->irq,
        netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
        netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
 
    return 0;
 
err_out_free:
    e100_free(nic);
err_out_iounmap:
    iounmap(nic->csr);
err_out_free_res:
    pci_release_regions(pdev);
err_out_disable_pdev:
    pci_disable_device(pdev);
err_out_free_dev:
    pci_set_drvdata(pdev, NULL);
    free_netdev(netdev);
    return err;
以上就是网卡探测的过程。
// 把PCI总线读一下,强迫写完成。
static inline void e100_write_flush(struct nic *nic)
{
    /* Flush previous PCI writes through intermediate bridges
     * by doing a benign read */
    (void)readb(&nic->csr->scb.status);
}
 
static inline void e100_enable_irq(struct nic *nic)
{
    unsigned long flags;
   // 自旋锁之前,禁止中断
    spin_lock_irqsave(&nic->cmd_lock, flags);
    writeb(irq_mask_none, &nic->csr->scb.cmd_hi);
    spin_unlock_irqrestore(&nic->cmd_lock, flags);
   // 刷新
    e100_write_flush(nic);
}
 
static inline void e100_disable_irq(struct nic *nic)
{
    unsigned long flags;
 
    spin_lock_irqsave(&nic->cmd_lock, flags);
    writeb(irq_mask_all, &nic->csr->scb.cmd_hi);
    spin_unlock_irqrestore(&nic->cmd_lock, flags);
    e100_write_flush(nic);
}
 
static void e100_hw_reset(struct nic *nic)
{
    /* Put CU and RU into idle with a selective reset to get
     * device off of PCI bus */
    writel(selective_reset, &nic->csr->port);
    e100_write_flush(nic); udelay(20);
 
    /* Now fully reset device */
    writel(software_reset, &nic->csr->port);
    e100_write_flush(nic); udelay(20);
 
    /* Mask off our interrupt line - it's unmasked after reset */
  // 关中断
    e100_disable_irq(nic);
}
 
//硬件初始化
static int e100_hw_init(struct nic *nic)
{
    int err;
 
    e100_hw_reset(nic);
 
    DPRINTK(HW, ERR, "e100_hw_init/n");
    if(!in_interrupt() && (err = e100_self_test(nic)))
        return err;
 
    if((err = e100_phy_init(nic)))
        return err;
    if((err = e100_exec_cmd(nic, cuc_load_base, 0)))
        return err;
    if((err = e100_exec_cmd(nic, ruc_load_base, 0)))
        return err;
    if((err = e100_exec_cb(nic, NULL, e100_load_ucode)))
        return err;
    if((err = e100_exec_cb(nic, NULL, e100_configure)))
        return err;
    if((err = e100_exec_cb(nic, NULL, e100_setup_iaaddr)))
        return err;
    if((err = e100_exec_cmd(nic, cuc_dump_addr,
        nic->dma_addr + offsetof(struct mem, stats))))
        return err;
    if((err = e100_exec_cmd(nic, cuc_dump_reset, 0)))
        return err;
 
    e100_disable_irq(nic);
 
    return 0;
}
//多播
static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb)
{
    struct net_device *netdev = nic->netdev;
    struct dev_mc_list *list = netdev->mc_list;
    u16 i, count = min(netdev->mc_count, E100_MAX_MULTICAST_ADDRS);
 
    cb->command = cpu_to_le16(cb_multi);
    cb->u.multi.count = cpu_to_le16(count * ETH_ALEN);
   for(i = 0; list && i < count; i++, list = list->next)
        memcpy(&cb->u.multi.addr[i*ETH_ALEN], &list->dmi_addr,
           ETH_ALEN);
}
 
static void e100_set_multicast_list(struct net_device *netdev)
{
    struct nic *nic = netdev_priv(netdev);
 
    DPRINTK(HW, DEBUG, "mc_count=%d, flags=0x%04X/n",
        netdev->mc_count, netdev->flags);
 
    if(netdev->flags & IFF_PROMISC)
        nic->flags |= promiscuous;
    else
        nic->flags &= ~promiscuous;
 
    if(netdev->flags & IFF_ALLMULTI ||
        netdev->mc_count > E100_MAX_MULTICAST_ADDRS)
        nic->flags |= multicast_all;
    else
        nic->flags &= ~multicast_all;
 
    e100_exec_cb(nic, NULL, e100_configure);
    e100_exec_cb(nic, NULL, e100_multi);
}
 
static void e100_update_stats(struct nic *nic)
{
    struct net_device_stats *ns = &nic->net_stats;
    struct stats *s = &nic->mem->stats;
    u32 *complete = (nic->mac < mac_82558_D101_A4) ? &s->fc_xmt_pause :
        (nic->mac < mac_82559_D101M) ? (u32 *)&s->xmt_tco_frames :
        &s->complete;
 
    /* Device's stats reporting may take several microseconds to
     * complete, so where always waiting for results of the
     * previous command. */
 
    if(*complete == le32_to_cpu(cuc_dump_reset_complete)) {
        *complete = 0;
        nic->tx_frames = le32_to_cpu(s->tx_good_frames);
        nic->tx_collisions = le32_to_cpu(s->tx_total_collisions);
        ns->tx_aborted_errors += le32_to_cpu(s->tx_max_collisions);
        ns->tx_window_errors += le32_to_cpu(s->tx_late_collisions);
        ns->tx_carrier_errors += le32_to_cpu(s->tx_lost_crs);
        ns->tx_fifo_errors += le32_to_cpu(s->tx_underruns);
        ns->collisions += nic->tx_collisions;
        ns->tx_errors += le32_to_cpu(s->tx_max_collisions) +
           le32_to_cpu(s->tx_lost_crs);
        ns->rx_length_errors += le32_to_cpu(s->rx_short_frame_errors) +
           nic->rx_over_length_errors;
        ns->rx_crc_errors += le32_to_cpu(s->rx_crc_errors);
        ns->rx_frame_errors += le32_to_cpu(s->rx_alignment_errors);
        ns->rx_over_errors += le32_to_cpu(s->rx_overrun_errors);
        ns->rx_fifo_errors += le32_to_cpu(s->rx_overrun_errors);
        ns->rx_missed_errors += le32_to_cpu(s->rx_resource_errors);
        ns->rx_errors += le32_to_cpu(s->rx_crc_errors) +
           le32_to_cpu(s->rx_alignment_errors) +
           le32_to_cpu(s->rx_short_frame_errors) +
           le32_to_cpu(s->rx_cdt_errors);
        nic->tx_deferred += le32_to_cpu(s->tx_deferred);
        nic->tx_single_collisions +=
           le32_to_cpu(s->tx_single_collisions);
        nic->tx_multiple_collisions +=
           le32_to_cpu(s->tx_multiple_collisions);
        if(nic->mac >= mac_82558_D101_A4) {
           nic->tx_fc_pause += le32_to_cpu(s->fc_xmt_pause);
           nic->rx_fc_pause += le32_to_cpu(s->fc_rcv_pause);
           nic->rx_fc_unsupported +=
               le32_to_cpu(s->fc_rcv_unsupported);
           if(nic->mac >= mac_82559_D101M) {
               nic->tx_tco_frames +=
                   le16_to_cpu(s->xmt_tco_frames);
               nic->rx_tco_frames +=
                   le16_to_cpu(s->rcv_tco_frames);
           }
       }
    }
 
   
    if(e100_exec_cmd(nic, cuc_dump_reset, 0))
        DPRINTK(TX_ERR, DEBUG, "exec cuc_dump_reset failed/n");
}
 
// 网卡信息监测
static void e100_watchdog(unsigned long data)
{
    struct nic *nic = (struct nic *)data;
    struct ethtool_cmd cmd;
 
    DPRINTK(TIMER, DEBUG, "right now = %ld/n", jiffies);
 
    /* mii library handles link maintenance tasks */
 
    mii_ethtool_gset(&nic->mii, &cmd);
 
    if(mii_link_ok(&nic->mii) && !netif_carrier_ok(nic->netdev)) {
        DPRINTK(LINK, INFO, "link up, %sMbps, %s-duplex/n",
           cmd.speed == SPEED_100 ? "100" : "10",
           cmd.duplex == DUPLEX_FULL ? "full" : "half");
    } else if(!mii_link_ok(&nic->mii) && netif_carrier_ok(nic->netdev)) {
        DPRINTK(LINK, INFO, "link down/n");
    }
 
    mii_check_link(&nic->mii);
 
    /* Software generated interrupt to recover from (rare) Rx
    * allocation failure.
    * Unfortunately have to use a spinlock to not re-enable interrupts
    * accidentally, due to hardware that shares a register between the
    * interrupt mask bit and the SW Interrupt generation bit */
    spin_lock_irq(&nic->cmd_lock);
    writeb(readb(&nic->csr->scb.cmd_hi) | irq_sw_gen,&nic->csr->scb.cmd_hi);
    spin_unlock_irq(&nic->cmd_lock);
    e100_write_flush(nic);
 
    e100_update_stats(nic);
    e100_adjust_adaptive_ifs(nic, cmd.speed, cmd.duplex);
 
    if(nic->mac <= mac_82557_D100_C)
       /* Issue a multicast command to workaround a 557 lock up */
        e100_set_multicast_list(nic->netdev);
 
    if(nic->flags & ich && cmd.speed==SPEED_10 && cmd.duplex==DUPLEX_HALF)
       /* Need SW workaround for ICH[x] 10Mbps/half duplex Tx hang. */
        nic->flags |= ich_10h_workaround;
    else
        nic->flags &= ~ich_10h_workaround;
 
    mod_timer(&nic->watchdog, jiffies + E100_WATCHDOG_PERIOD);
}
 
static inline void e100_xmit_prepare(struct nic *nic, struct cb *cb,
    struct sk_buff *skb)
{
    cb->command = nic->tx_command;
    /* interrupt every 16 packets regardless of delay */
    if((nic->cbs_avail & ~15) == nic->cbs_avail)
        cb->command |= cpu_to_le16(cb_i);
    cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd);
    cb->u.tcb.tcb_byte_count = 0;
    cb->u.tcb.threshold = nic->tx_threshold;
    cb->u.tcb.tbd_count = 1;
    cb->u.tcb.tbd.buf_addr = cpu_to_le32(pci_map_single(nic->pdev,
        skb->data, skb->len, PCI_DMA_TODEVICE));
    /* check for mapping failure? */
    cb->u.tcb.tbd.size = cpu_to_le16(skb->len);
}
 
static int e100_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
    struct nic *nic = netdev_priv(netdev);
    int err;
 
    if(nic->flags & ich_10h_workaround) {
       /* SW workaround for ICH[x] 10Mbps/half duplex Tx hang.
          Issue a NOP command followed by a 1us delay before
          issuing the Tx command. */
        if(e100_exec_cmd(nic, cuc_nop, 0))
           DPRINTK(TX_ERR, DEBUG, "exec cuc_nop failed/n");
        udelay(1);
    }
 
    err = e100_exec_cb(nic, skb, e100_xmit_prepare);
 
    switch(err) {
    case -ENOSPC:
       /* We queued the skb, but now we're out of space. */
        DPRINTK(TX_ERR, DEBUG, "No space for CB/n");
        netif_stop_queue(netdev);
        break;
    case -ENOMEM:
       /* This is a hard error - log it. */
        DPRINTK(TX_ERR, DEBUG, "Out of Tx resources, returning skb/n");
        netif_stop_queue(netdev);
        return 1;
    }
 
    netdev->trans_start = jiffies;
    return 0;
}
// 发包过程。
// 对发包对列进行清理
static inline int e100_tx_clean(struct nic *nic)
{
    struct cb *cb;
    int tx_cleaned = 0;
 
    spin_lock(&nic->cb_lock);
 
    DPRINTK(TX_DONE, DEBUG, "cb->status = 0x%04X/n",
        nic->cb_to_clean->status);
 
    /* Clean CBs marked complete */
    for(cb = nic->cb_to_clean;
        cb->status & cpu_to_le16(cb_complete);// 转成无符号32位小头数值
        cb = nic->cb_to_clean = cb->next) {
        if(likely(cb->skb != NULL)) {
           nic->net_stats.tx_packets++;
           nic->net_stats.tx_bytes += cb->skb->len;
 
           pci_unmap_single(nic->pdev,
               le32_to_cpu(cb->u.tcb.tbd.buf_addr),
               le16_to_cpu(cb->u.tcb.tbd.size),
               PCI_DMA_TODEVICE);// 解除 DMA 映射
           dev_kfree_skb_any(cb->skb);// 释放 skb
           cb->skb = NULL;
           tx_cleaned = 1;
       }
        cb->status = 0;
        nic->cbs_avail++;
    }
 
    spin_unlock(&nic->cb_lock);
 
    /* Recover from running out of Tx resources in xmit_frame */
    if(unlikely(tx_cleaned && netif_queue_stopped(nic->netdev)))
        netif_wake_queue(nic->netdev);
 
    return tx_cleaned;
}
// 控制队列操作
static void e100_clean_cbs(struct nic *nic)
{
    if(nic->cbs) {
        while(nic->cbs_avail != nic->params.cbs.count) {
           struct cb *cb = nic->cb_to_clean;
           if(cb->skb) {
               pci_unmap_single(nic->pdev,
                   le32_to_cpu(cb->u.tcb.tbd.buf_addr),
                   le16_to_cpu(cb->u.tcb.tbd.size),
                   PCI_DMA_TODEVICE);
               dev_kfree_skb(cb->skb);
           }
           nic->cb_to_clean = nic->cb_to_clean->next;
           nic->cbs_avail++;
       }
        pci_free_consistent(nic->pdev,
           sizeof(struct cb) * nic->params.cbs.count,
          nic->cbs, nic->cbs_dma_addr);
        nic->cbs = NULL;
        nic->cbs_avail = 0;
    }
    nic->cuc_cmd = cuc_start;
    nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean =
        nic->cbs;
}
 
static int e100_alloc_cbs(struct nic *nic)
{
    struct cb *cb;
    unsigned int i, count = nic->params.cbs.count;
 
    nic->cuc_cmd = cuc_start;
    nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL;
    nic->cbs_avail = 0;
 
    nic->cbs = pci_alloc_consistent(nic->pdev,
        sizeof(struct cb) * count, &nic->cbs_dma_addr);
    if(!nic->cbs)
        return -ENOMEM;
 
    for(cb = nic->cbs, i = 0; i < count; cb++, i++) {
        cb->next = (i + 1 < count) ? cb + 1 : nic->cbs;
        cb->prev = (i == 0) ? nic->cbs + count - 1 : cb - 1;
 
        cb->dma_addr = nic->cbs_dma_addr + i * sizeof(struct cb);
        cb->link = cpu_to_le32(nic->cbs_dma_addr +
           ((i+1) % count) * sizeof(struct cb));
        cb->skb = NULL;
    }
 
    nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = nic->cbs;
    nic->cbs_avail = count;
 
    return 0;
}
// 启动接收过程
static inline void e100_start_receiver(struct nic *nic, struct rx *rx)
{
    if(!nic->rxs) return;
    if(RU_SUSPENDED != nic->ru_running) return;
 
    /* handle init time starts */
    if(!rx) rx = nic->rxs;
 
    /* (Re)start RU if suspended or idle and RFA is non-NULL */
    if(rx->skb) {
        e100_exec_cmd(nic, ruc_start, rx->dma_addr);
        nic->ru_running = RU_RUNNING;
    }
}
/*
给收包过程分配skb,这个是非常重要的过程,主要完成skb 的分配工作,如果rx 队列没有
skb,则new 一个,否则把状态同步一下,然后直接使用旧的skb,用于提高效率。分配好的
skb要作pci_map动作,就是把内存挂在网卡的DMA通道,等有中断发生,内存就是网络数据
包了,效验的动作在后面会作*/
 
#define RFD_BUF_LEN (sizeof(struct rfd) + VLAN_ETH_FRAME_LEN)
static inline int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
{
    if(!(rx->skb = dev_alloc_skb(RFD_BUF_LEN + NET_IP_ALIGN)))
        return -ENOMEM;
 
    /* Align, init, and map the RFD. */
    rx->skb->dev = nic->netdev;
    skb_reserve(rx->skb, NET_IP_ALIGN);//IP对齐
    memcpy(rx->skb->data, &nic->blank_rfd, sizeof(struct rfd));
// 映射到DMA通道
    rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
        RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 
    if(pci_dma_mapping_error(rx->dma_addr)) {
        dev_kfree_skb_any(rx->skb);
        rx->skb = 0;
        rx->dma_addr = 0;
        return -ENOMEM;
    }
 
    /* Link the RFD to end of RFA by linking previous RFD to
     * this one, and clearing EL bit of previous. */
    if(rx->prev->skb) {
        struct rfd *prev_rfd = (struct rfd *)rx->prev->skb->data;
        put_unaligned(cpu_to_le32(rx->dma_addr),
           (u32 *)&prev_rfd->link);
        wmb();
        prev_rfd->command &= ~cpu_to_le16(cb_el);
        pci_dma_sync_single_for_device(nic->pdev, rx->prev->dma_addr,
           sizeof(struct rfd), PCI_DMA_TODEVICE);
    }
 
    return 0;
}
以下为主要的收包过程。
static inline int e100_rx_indicate(struct nic *nic, struct rx *rx,
    unsigned int *work_done, unsigned int work_to_do)
{
    struct sk_buff *skb = rx->skb;
    struct rfd *rfd = (struct rfd *)skb->data;
    u16 rfd_status, actual_size;
 
    if(unlikely(work_done && *work_done >= work_to_do))
        return -EAGAIN;
 
    /* Need to sync before taking a peek at cb_complete bit */
    pci_dma_sync_single_for_cpu(nic->pdev, rx->dma_addr,
        sizeof(struct rfd), PCI_DMA_FROMDEVICE);
    rfd_status = le16_to_cpu(rfd->status);
 
    DPRINTK(RX_STATUS, DEBUG, "status=0x%04X/n", rfd_status);
 
    /* If data isn't ready, nothing to indicate */
    if(unlikely(!(rfd_status & cb_complete)))
        return -ENODATA;
 
    /* Get actual data size */
    actual_size = le16_to_cpu(rfd->actual_size) & 0x3FFF;
    if(unlikely(actual_size > RFD_BUF_LEN - sizeof(struct rfd)))
        actual_size = RFD_BUF_LEN - sizeof(struct rfd);
 
    /* Get data */
    pci_unmap_single(nic->pdev, rx->dma_addr,
        RFD_BUF_LEN, PCI_DMA_FROMDEVICE);
 
    /* this allows for a fast restart without re-enabling interrupts */
    if(le16_to_cpu(rfd->command) & cb_el)
        nic->ru_running = RU_SUSPENDED;
 
    /* Pull off the RFD and put the actual data (minus eth hdr) */
    skb_reserve(skb, sizeof(struct rfd));
    skb_put(skb, actual_size);
    skb->protocol = eth_type_trans(skb, nic->netdev);
 
    if(unlikely(!(rfd_status & cb_ok))) {
       /* Don't indicate if hardware indicates errors */
        dev_kfree_skb_any(skb);
    } else if(actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN) {
       /* Don't indicate oversized frames */
        nic->rx_over_length_errors++;
        dev_kfree_skb_any(skb);
    } else {
        nic->net_stats.rx_packets++;
        nic->net_stats.rx_bytes += actual_size;
        nic->netdev->last_rx = jiffies;
        netif_receive_skb(skb);
        if(work_done)
           (*work_done)++;
    }
 
    rx->skb = NULL;
 
    return 0;
}
收包清除
static inline void e100_rx_clean(struct nic *nic, unsigned int *work_done,
    unsigned int work_to_do)
{
    struct rx *rx;
    int restart_required = 0;
    struct rx *rx_to_start = NULL;
 
    /* are we already rnr? then pay attention!!! this ensures that
     * the state machine progression never allows a start with a
     * partially cleaned list, avoiding a race between hardware
     * and rx_to_clean when in NAPI mode */
    if(RU_SUSPENDED == nic->ru_running)
        restart_required = 1;
 
    /* Indicate newly arrived packets */
    for(rx = nic->rx_to_clean; rx->skb; rx = nic->rx_to_clean = rx->next) {
       int err = e100_rx_indicate(nic, rx, work_done, work_to_do);
        if(-EAGAIN == err) {
           /* hit quota so have more work to do, restart once
            * cleanup is complete */
           restart_required = 0;
           break;
       } else if(-ENODATA == err)
           break; /* No more to clean */
    }
 
    /* save our starting point as the place we'll restart the receiver */
    if(restart_required)
        rx_to_start = nic->rx_to_clean;
 
    /* Alloc new skbs to refill list */
    for(rx = nic->rx_to_use; !rx->skb; rx = nic->rx_to_use = rx->next) {
        if(unlikely(e100_rx_alloc_skb(nic, rx)))
           break; /* Better luck next time (see watchdog) */
    }
 
    if(restart_required) {
       // ack the rnr?
        writeb(stat_ack_rnr, &nic->csr->scb.stat_ack);
        e100_start_receiver(nic, rx_to_start);
        if(work_done)
           (*work_done)++;
    }
}
 
static void e100_rx_clean_list(struct nic *nic)
{
    struct rx *rx;
    unsigned int i, count = nic->params.rfds.count;
 
    nic->ru_running = RU_UNINITIALIZED;
 
    if(nic->rxs) {
        for(rx = nic->rxs, i = 0; i < count; rx++, i++) {
           if(rx->skb) {
               pci_unmap_single(nic->pdev, rx->dma_addr,
                   RFD_BUF_LEN, PCI_DMA_FROMDEVICE);
               dev_kfree_skb(rx->skb);
           }
       }
        kfree(nic->rxs);
        nic->rxs = NULL;
    }
 
    nic->rx_to_use = nic->rx_to_clean = NULL;
}
 
static int e100_rx_alloc_list(struct nic *nic)
{
    struct rx *rx;
    unsigned int i, count = nic->params.rfds.count;
 
    nic->rx_to_use = nic->rx_to_clean = NULL;
    nic->ru_running = RU_UNINITIALIZED;
 
    if(!(nic->rxs = kmalloc(sizeof(struct rx) * count, GFP_ATOMIC)))
        return -ENOMEM;
    memset(nic->rxs, 0, sizeof(struct rx) * count);
 
    for(rx = nic->rxs, i = 0; i < count; rx++, i++) {
        rx->next = (i + 1 < count) ? rx + 1 : nic->rxs;
        rx->prev = (i == 0) ? nic->rxs + count - 1 : rx - 1;
        if(e100_rx_alloc_skb(nic, rx)) {
           e100_rx_clean_list(nic);
           return -ENOMEM;
       }
    }
 
    nic->rx_to_use = nic->rx_to_clean = nic->rxs;
    nic->ru_running = RU_SUSPENDED;
 
    return 0;
}
 
static int e100_poll(struct net_device *netdev, int *budget)
{
    struct nic *nic = netdev_priv(netdev);
    unsigned int work_to_do = min(netdev->quota, *budget);
    unsigned int work_done = 0;
    int tx_cleaned;
 
    e100_rx_clean(nic, &work_done, work_to_do);
    tx_cleaned = e100_tx_clean(nic);
 
    /* If no Rx and Tx cleanup work was done, exit polling mode. */
    if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {
        netif_rx_complete(netdev);
        e100_enable_irq(nic);
        return 0;
    }
 
    *budget -= work_done;
    netdev->quota -= work_done;
 
    return 1;
}
 
#ifdef CONFIG_NET_POLL_CONTROLLER
static void e100_netpoll(struct net_device *netdev)
{
    struct nic *nic = netdev_priv(netdev);
 
    e100_disable_irq(nic);
    e100_intr(nic->pdev->irq, netdev, NULL);
    e100_tx_clean(nic);
    e100_enable_irq(nic);
}
#endif
 
static struct net_device_stats *e100_get_stats(struct net_device *netdev)
{
    struct nic *nic = netdev_priv(netdev);
    return &nic->net_stats;
}
 
static int e100_set_mac_address(struct net_device *netdev, void *p)
{
    struct nic *nic = netdev_priv(netdev);
    struct sockaddr *addr = p;
 
    if (!is_valid_ether_addr(addr->sa_data))
        return -EADDRNOTAVAIL;
 
    memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
    e100_exec_cb(nic, NULL, e100_setup_iaaddr);
 
    return 0;
}
 
static int e100_change_mtu(struct net_device *netdev, int new_mtu)
{
    if(new_mtu < ETH_ZLEN || new_mtu > ETH_DATA_LEN)
        return -EINVAL;
    netdev->mtu = new_mtu;
    return 0;
}
 
#ifdef CONFIG_PM
static int e100_asf(struct nic *nic);
#endif
 
static int e100_up(struct nic *nic)
{
    int err;
 
    if((err = e100_rx_alloc_list(nic)))
        return err;
    if((err = e100_alloc_cbs(nic)))
       goto err_rx_clean_list;
    if((err = e100_hw_init(nic)))
       goto err_clean_cbs;
    e100_set_multicast_list(nic->netdev);
    e100_start_receiver(nic, 0);
    mod_timer(&nic->watchdog, jiffies);
    if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ,
        nic->netdev->name, nic->netdev)))
       goto err_no_irq;
    netif_wake_queue(nic->netdev);
    netif_poll_enable(nic->netdev);
    /* enable ints _after_ enabling poll, preventing a race between
     * disable ints+schedule */
    e100_enable_irq(nic);
    return 0;
 
err_no_irq:
    del_timer_sync(&nic->watchdog);
err_clean_cbs:
    e100_clean_cbs(nic);
err_rx_clean_list:
    e100_rx_clean_list(nic);
    return err;
}
 
static void e100_down(struct nic *nic)
{
    /* wait here for poll to complete */
    netif_poll_disable(nic->netdev);
    netif_stop_queue(nic->netdev);
    e100_hw_reset(nic);
    free_irq(nic->pdev->irq, nic->netdev);
    del_timer_sync(&nic->watchdog);
    netif_carrier_off(nic->netdev);
    e100_clean_cbs(nic);
    e100_rx_clean_list(nic);
}
 
static void e100_tx_timeout(struct net_device *netdev)
{
    struct nic *nic = netdev_priv(netdev);
 
    /* Reset outside of interrupt context, to avoid request_irq
     * in interrupt context */
    schedule_work(&nic->tx_timeout_task);
}
 
#define MII_LED_CONTROL 0x1B
static void e100_blink_led(unsigned long data)
{
    struct nic *nic = (struct nic *)data;
    enum led_state {
        led_on     = 0x01,
        led_off    = 0x04,
        led_on_559 = 0x05,
        led_on_557 = 0x07,
    };
 
    nic->leds = (nic->leds & led_on) ? led_off :
        (nic->mac < mac_82559_D101M) ? led_on_557 : led_on_559;
    mdio_write(nic->netdev, nic->mii.phy_id, MII_LED_CONTROL, nic->leds);
    mod_timer(&nic->blink_timer, jiffies + HZ / 4);
}
 
static int e100_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
{
    struct nic *nic = netdev_priv(netdev);
    return mii_ethtool_gset(&nic->mii, cmd);
}
 
static int e100_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
{
    struct nic *nic = netdev_priv(netdev);
    int err;
 
    mdio_write(netdev, nic->mii.phy_id, MII_BMCR, BMCR_RESET);
    err = mii_ethtool_sset(&nic->mii, cmd);
    e100_exec_cb(nic, NULL, e100_configure);
 
    return err;
}
 
。。。。。
// 对应标准网卡驱动程序的一些封装函数
static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
    struct nic *nic = netdev_priv(netdev);
 
    return generic_mii_ioctl(&nic->mii, if_mii(ifr), cmd, NULL);
}
 
static int e100_alloc(struct nic *nic)
{
    nic->mem = pci_alloc_consistent(nic->pdev, sizeof(struct mem),
        &nic->dma_addr);
    return nic->mem ? 0 : -ENOMEM;
}
 
static void e100_free(struct nic *nic)
{
    if(nic->mem) {
        pci_free_consistent(nic->pdev, sizeof(struct mem),
          nic->mem, nic->dma_addr);
        nic->mem = NULL;
    }
}
 
static int e100_open(struct net_device *netdev)
{
    struct nic *nic = netdev_priv(netdev);
    int err = 0;
 
    netif_carrier_off(netdev);
    if((err = e100_up(nic)))
        DPRINTK(IFUP, ERR, "Cannot open interface, aborting./n");
    return err;
}
 
static int e100_close(struct net_device *netdev)
{
    e100_down(netdev_priv(netdev));
    return 0;
}
 
 类似资料: