• linux网卡驱动注册与接受数据处理


    Linux网络接受

    本文基于2.6.15的内核版本来简单学习一下有关Linux内核如何进行网络数据的处理流程。

    网卡驱动注册

    以ixgb驱动为例,在网卡注册的时候会进入如下流程。

    static struct pci_driver ixgb_driver = {
    	.name     = ixgb_driver_name,
    	.id_table = ixgb_pci_tbl,
    	.probe    = ixgb_probe,
    	.remove   = __devexit_p(ixgb_remove),
    };
    
    ...
    /**
     * ixgb_init_module - Driver Registration Routine
     *
     * ixgb_init_module is the first routine called when the driver is
     * loaded. All it does is register with the PCI subsystem.
     **/
    
    static int __init
    ixgb_init_module(void)
    {
    	printk(KERN_INFO "%s - version %s\n",
    	       ixgb_driver_string, ixgb_driver_version);
    
    	printk(KERN_INFO "%s\n", ixgb_copyright);
    
    	return pci_module_init(&ixgb_driver);
    }
    
    module_init(ixgb_init_module);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27

    PCI注册的流程会将调用probe方法,即ixgb_probe。

    /**
     * ixgb_probe - Device Initialization Routine
     * @pdev: PCI device information struct
     * @ent: entry in ixgb_pci_tbl
     *
     * Returns 0 on success, negative on failure
     *
     * ixgb_probe initializes an adapter identified by a pci_dev structure.
     * The OS initialization, configuring of the adapter private structure,
     * and a hardware reset occur.
     **/
    
    static int __devinit
    ixgb_probe(struct pci_dev *pdev,
    		const struct pci_device_id *ent)
    {
    	struct net_device *netdev = NULL;
    	struct ixgb_adapter *adapter;
    	static int cards_found = 0;
    	unsigned long mmio_start;
    	int mmio_len;
    	int pci_using_dac;
    	int i;
    	int err;
    
    	if((err = pci_enable_device(pdev)))  // 是否可以注册该设备
    		return err;
    
    	if(!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { //设置DMA的掩码
    		pci_using_dac = 1;
    	} else {
    		if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {
    			IXGB_ERR("No usable DMA configuration, aborting\n");
    			return err;
    		}
    		pci_using_dac = 0;
    	}
    
    	if((err = pci_request_regions(pdev, ixgb_driver_name)))
    		return err;
    
    	pci_set_master(pdev);
    
    	netdev = alloc_etherdev(sizeof(struct ixgb_adapter)); // 申请设备结构体
    	if(!netdev) {
    		err = -ENOMEM;
    		goto err_alloc_etherdev;
    	}
    
    	SET_MODULE_OWNER(netdev);
    	SET_NETDEV_DEV(netdev, &pdev->dev);
    
    	pci_set_drvdata(pdev, netdev);    // 设置DMA对应的管理的内存空间
    	adapter = netdev_priv(netdev);
    	adapter->netdev = netdev;
    	adapter->pdev = pdev;
    	adapter->hw.back = adapter;
    
    	mmio_start = pci_resource_start(pdev, BAR_0);
    	mmio_len = pci_resource_len(pdev, BAR_0);
    
    	adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
    	if(!adapter->hw.hw_addr) {
    		err = -EIO;
    		goto err_ioremap;
    	}
    
    	for(i = BAR_1; i <= BAR_5; i++) {
    		if(pci_resource_len(pdev, i) == 0)
    			continue;
    		if(pci_resource_flags(pdev, i) & IORESOURCE_IO) {
    			adapter->hw.io_base = pci_resource_start(pdev, i);
    			break;
    		}
    	}
    
    	netdev->open = &ixgb_open;  // 设置ethtool对应的操作的方法,即设备的打开的方法
    	netdev->stop = &ixgb_close;   // 设备的关闭的方法
    	netdev->hard_start_xmit = &ixgb_xmit_frame;
    	netdev->get_stats = &ixgb_get_stats;
    	netdev->set_multicast_list = &ixgb_set_multi;
    	netdev->set_mac_address = &ixgb_set_mac;
    	netdev->change_mtu = &ixgb_change_mtu;  // 设置mtu长度
    	ixgb_set_ethtool_ops(netdev);
    	netdev->tx_timeout = &ixgb_tx_timeout;  // 设置超时时间
    	netdev->watchdog_timeo = HZ;
    #ifdef CONFIG_IXGB_NAPI
    	netdev->poll = &ixgb_clean;    // 设置设备的poll方法 一般用在NAPI模式中
    	netdev->weight = 64;
    #endif
    	netdev->vlan_rx_register = ixgb_vlan_rx_register;  // 设置vlan的方法
    	netdev->vlan_rx_add_vid = ixgb_vlan_rx_add_vid;
    	netdev->vlan_rx_kill_vid = ixgb_vlan_rx_kill_vid;
    #ifdef CONFIG_NET_POLL_CONTROLLER
    	netdev->poll_controller = ixgb_netpoll;  //是否设置了netpoll的方式
    #endif
    
    	netdev->mem_start = mmio_start;
    	netdev->mem_end = mmio_start + mmio_len;
    	netdev->base_addr = adapter->hw.io_base;
    
    	adapter->bd_number = cards_found;
    	adapter->link_speed = 0;
    	adapter->link_duplex = 0;
    
    	/* setup the private structure */
    
    	if((err = ixgb_sw_init(adapter)))
    		goto err_sw_init;
    
    	netdev->features = NETIF_F_SG |
    			   NETIF_F_HW_CSUM |
    			   NETIF_F_HW_VLAN_TX |
    			   NETIF_F_HW_VLAN_RX |
    			   NETIF_F_HW_VLAN_FILTER;
    #ifdef NETIF_F_TSO
    	netdev->features |= NETIF_F_TSO;
    #endif
    
    	if(pci_using_dac)
    		netdev->features |= NETIF_F_HIGHDMA;
    
    	/* make sure the EEPROM is good */
    
    	if(!ixgb_validate_eeprom_checksum(&adapter->hw)) {
    		printk(KERN_ERR "The EEPROM Checksum Is Not Valid\n");
    		err = -EIO;
    		goto err_eeprom;
    	}
    
    	ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr);
    	memcpy(netdev->perm_addr, netdev->dev_addr, netdev->addr_len);
    
    	if(!is_valid_ether_addr(netdev->perm_addr)) {
    		err = -EIO;
    		goto err_eeprom;
    	}
    
    	adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw);
    
    	init_timer(&adapter->watchdog_timer);   // 设置定时器
    	adapter->watchdog_timer.function = &ixgb_watchdog;
    	adapter->watchdog_timer.data = (unsigned long)adapter;
    
    	INIT_WORK(&adapter->tx_timeout_task,
    		  (void (*)(void *))ixgb_tx_timeout_task, netdev);
    
    	if((err = register_netdev(netdev)))  // 注册网络设备
    		goto err_register;
    
    	/* we're going to reset, so assume we have no link for now */
    
    	netif_carrier_off(netdev);     // 重置网络设备的参数
    	netif_stop_queue(netdev);
    
    	printk(KERN_INFO "%s: Intel(R) PRO/10GbE Network Connection\n",
    		   netdev->name);
    	ixgb_check_options(adapter);
    	/* reset the hardware with the new settings */
    
    	ixgb_reset(adapter);
    
    	cards_found++;
    	return 0;
    
    err_register:
    err_sw_init:
    err_eeprom:
    	iounmap(adapter->hw.hw_addr);
    err_ioremap:
    	free_netdev(netdev);
    err_alloc_etherdev:
    	pci_release_regions(pdev);
    	return err;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175

    注册的方式主要就是通过配置一些网络的参数。

    打开网卡接受数据

    如果我们使用ethtool工具是该网卡打开,此时就会执行如下流程。

    static int
    ixgb_open(struct net_device *netdev)
    {
    	struct ixgb_adapter *adapter = netdev_priv(netdev);
    	int err;
    
    	/* allocate transmit descriptors */
    
    	if((err = ixgb_setup_tx_resources(adapter)))
    		goto err_setup_tx;
    
    	/* allocate receive descriptors */
    
    	if((err = ixgb_setup_rx_resources(adapter)))
    		goto err_setup_rx;
    
    	if((err = ixgb_up(adapter)))   // 注册中断函数,响应网卡数据
    		goto err_up;
    
    	return 0;
    
    err_up:
    	ixgb_free_rx_resources(adapter);
    err_setup_rx:
    	ixgb_free_tx_resources(adapter);
    err_setup_tx:
    	ixgb_reset(adapter);
    
    	return err;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30

    此时就会调用ixgb_open的函数,进行发送和接受的资源设置,并通过ixgb_up注册网卡的中断回调函数。

    int
    ixgb_up(struct ixgb_adapter *adapter)
    {
    	struct net_device *netdev = adapter->netdev;
    	int err;
    	int max_frame = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;  // 获取帧大小
    	struct ixgb_hw *hw = &adapter->hw;
    
    	/* hardware has been reset, we need to reload some things */
    
    	ixgb_set_multi(netdev);
    
    	ixgb_restore_vlan(adapter);
    
    	ixgb_configure_tx(adapter);  // 配置发送
    	ixgb_setup_rctl(adapter);
    	ixgb_configure_rx(adapter);  // 配置接受
    	ixgb_alloc_rx_buffers(adapter);  // 设置接受ring缓冲区
    
    #ifdef CONFIG_PCI_MSI
    	{
    	boolean_t pcix = (IXGB_READ_REG(&adapter->hw, STATUS) & 
    						  IXGB_STATUS_PCIX_MODE) ? TRUE : FALSE;
    	adapter->have_msi = TRUE;
    
    	if (!pcix)
    	   adapter->have_msi = FALSE;
    	else if((err = pci_enable_msi(adapter->pdev))) {
    		printk (KERN_ERR
    		 "Unable to allocate MSI interrupt Error: %d\n", err);
    		adapter->have_msi = FALSE;
    		/* proceed to try to request regular interrupt */
    	}
    	}
    
    #endif
    	if((err = request_irq(adapter->pdev->irq, &ixgb_intr,
    				  SA_SHIRQ | SA_SAMPLE_RANDOM,
    				  netdev->name, netdev)))  // 注册网卡的中断处理函数,并注册网卡的中断号
    		return err;
    
    	/* disable interrupts and get the hardware into a known state */
    	IXGB_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
    
    	if((hw->max_frame_size != max_frame) ||
    		(hw->max_frame_size !=
    		(IXGB_READ_REG(hw, MFS) >> IXGB_MFS_SHIFT))) {
    
    		hw->max_frame_size = max_frame;
    
    		IXGB_WRITE_REG(hw, MFS, hw->max_frame_size << IXGB_MFS_SHIFT);
    
    		if(hw->max_frame_size >
    		   IXGB_MAX_ENET_FRAME_SIZE_WITHOUT_FCS + ENET_FCS_LENGTH) {
    			uint32_t ctrl0 = IXGB_READ_REG(hw, CTRL0);
    
    			if(!(ctrl0 & IXGB_CTRL0_JFE)) {
    				ctrl0 |= IXGB_CTRL0_JFE;
    				IXGB_WRITE_REG(hw, CTRL0, ctrl0);
    			}
    		}
    	}
    
    	mod_timer(&adapter->watchdog_timer, jiffies);
    	ixgb_irq_enable(adapter);   // 开启中断处理
    
    #ifdef CONFIG_IXGB_NAPI
    	netif_poll_enable(netdev);
    #endif
    	return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71

    主要是设置了接受的数据缓冲区,设置了中断的处理函数ixgb_intr,当数据来了之后就会调用ixgb_intr函数进行处理。

    static irqreturn_t
    ixgb_intr(int irq, void *data, struct pt_regs *regs)
    {
    	struct net_device *netdev = data;
    	struct ixgb_adapter *adapter = netdev_priv(netdev);
    	struct ixgb_hw *hw = &adapter->hw;
    	uint32_t icr = IXGB_READ_REG(hw, ICR);
    #ifndef CONFIG_IXGB_NAPI
    	unsigned int i;
    #endif
    
    	if(unlikely(!icr))
    		return IRQ_NONE;  /* Not our interrupt */
    
    	if(unlikely(icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC))) {
    		mod_timer(&adapter->watchdog_timer, jiffies);
    	}
    
    #ifdef CONFIG_IXGB_NAPI
    	if(netif_rx_schedule_prep(netdev)) {  // 是否是NAPI模式运行
    
    		/* Disable interrupts and register for poll. The flush 
    		  of the posted write is intentionally left out.
    		*/
    
    		atomic_inc(&adapter->irq_sem);
    		IXGB_WRITE_REG(&adapter->hw, IMC, ~0);
    		__netif_rx_schedule(netdev);
    	}
    #else
    	/* yes, that is actually a & and it is meant to make sure that
    	 * every pass through this for loop checks both receive and
    	 * transmit queues for completed descriptors, intended to
    	 * avoid starvation issues and assist tx/rx fairness. */
    	for(i = 0; i < IXGB_MAX_INTR; i++)  // 通过ixgb_clean_rx_irq进行数据处理
    		if(!ixgb_clean_rx_irq(adapter) &
    		   !ixgb_clean_tx_irq(adapter))
    			break;
    #endif 
    	return IRQ_HANDLED;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41

    此时就看配置的是网卡的哪种数据处理类型,一般有如下两种网卡数据处理流程。

    NAPI模式

    通过softirq内核线程来进行网卡数据的进行poll进行,从而避免过多的中断来进行处理提升效率。

    static inline void __netif_rx_schedule(struct net_device *dev)
    {
    	unsigned long flags;
    
    	local_irq_save(flags);
    	dev_hold(dev);
    	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); // 加入当前设备的队列从而使在软中断中内容持续接受网卡的数据
    	if (dev->quota < 0)
    		dev->quota += dev->weight;
    	else
    		dev->quota = dev->weight;
    	__raise_softirq_irqoff(NET_RX_SOFTIRQ);  // 发出软中断,发送数据接受软中断
    	local_irq_restore(flags);
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14

    此时就会调用在NET_RX_SOFTIRQ注册的信号方法。

    open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
    
    • 1

    此时调用的就是网络处理。

    static void net_rx_action(struct softirq_action *h)
    {
    	struct softnet_data *queue = &__get_cpu_var(softnet_data);
    	unsigned long start_time = jiffies;
    	int budget = netdev_budget;
    	void *have;
    
    	local_irq_disable();
    
    	while (!list_empty(&queue->poll_list)) {  // 检查链表是否为空
    		struct net_device *dev;
    
    		if (budget <= 0 || jiffies - start_time > 1)
    			goto softnet_break;
    
    		local_irq_enable();  //使能中断
    
    		dev = list_entry(queue->poll_list.next,
    				 struct net_device, poll_list);  // 获取设备
    		have = netpoll_poll_lock(dev);    
    
    		if (dev->quota <= 0 || dev->poll(dev, &budget)) {  // 获取链表上面的数据
    			netpoll_poll_unlock(have);
    			local_irq_disable();
    			list_del(&dev->poll_list);  
    			list_add_tail(&dev->poll_list, &queue->poll_list);
    			if (dev->quota < 0)
    				dev->quota += dev->weight;
    			else
    				dev->quota = dev->weight;
    		} else {
    			netpoll_poll_unlock(have);
    			dev_put(dev);  // 将获取数据放回
    			local_irq_disable();
    		}
    	}
    out:
    	local_irq_enable();  //使能中断
    	return;
    
    softnet_break:
    	__get_cpu_var(netdev_rx_stat).time_squeeze++;
    	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
    	goto out;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45

    通过调用dev的poll函数来进行数据的处理。ixgb对应的poll函数为ixgb_clean。

    #ifdef CONFIG_IXGB_NAPI
    /**
     * ixgb_clean - NAPI Rx polling callback
     * @adapter: board private structure
     **/
    
    static int
    ixgb_clean(struct net_device *netdev, int *budget)
    {
    	struct ixgb_adapter *adapter = netdev_priv(netdev);
    	int work_to_do = min(*budget, netdev->quota);
    	int tx_cleaned;
    	int work_done = 0;
    
    	tx_cleaned = ixgb_clean_tx_irq(adapter);
    	ixgb_clean_rx_irq(adapter, &work_done, work_to_do); // 处理单个网络接受数据
    
    	*budget -= work_done;
    	netdev->quota -= work_done;  // 减去王超的数量
    
    	/* if no Tx and not enough Rx work done, exit the polling mode */
    	if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {
    		netif_rx_complete(netdev);  // 如果没了就离开该模式
    		ixgb_irq_enable(adapter);
    		return 0;
    	}
    
    	return 1;
    }
    #endif
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30

    其中ixgb_clean_rx_irq函数,就是我们不经过NAPI模式处理的流程。

    数据直接传递

    数据直接传递就是直接通过ixgb_clean_rx_irq函数来处理网络包。

    static boolean_t
    #ifdef CONFIG_IXGB_NAPI
    ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
    #else
    ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
    #endif
    {
    	struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
    	struct net_device *netdev = adapter->netdev;
    	struct pci_dev *pdev = adapter->pdev;
    	struct ixgb_rx_desc *rx_desc, *next_rxd;
    	struct ixgb_buffer *buffer_info, *next_buffer, *next2_buffer;
    	uint32_t length;
    	unsigned int i, j;
    	boolean_t cleaned = FALSE;
    
    	i = rx_ring->next_to_clean;
    	rx_desc = IXGB_RX_DESC(*rx_ring, i);
    	buffer_info = &rx_ring->buffer_info[i];
    
    	while(rx_desc->status & IXGB_RX_DESC_STATUS_DD) {
    		struct sk_buff *skb, *next_skb;  // skb数据包
    		u8 status;
    
    #ifdef CONFIG_IXGB_NAPI
    		if(*work_done >= work_to_do)
    			break;
    
    		(*work_done)++;
    #endif
    		status = rx_desc->status;
    		skb = buffer_info->skb;  //获取网卡处理好的skb数据
    
    		prefetch(skb->data);
    
    		if(++i == rx_ring->count) i = 0;
    		next_rxd = IXGB_RX_DESC(*rx_ring, i);
    		prefetch(next_rxd);
    
    		if((j = i + 1) == rx_ring->count) j = 0;
    		next2_buffer = &rx_ring->buffer_info[j];  // 获取ring的数据
    		prefetch(next2_buffer);
    
    		next_buffer = &rx_ring->buffer_info[i];
    		next_skb = next_buffer->skb;
    		prefetch(next_skb);
    
    		cleaned = TRUE;
    
    		pci_unmap_single(pdev,
    				 buffer_info->dma,
    				 buffer_info->length,
    				 PCI_DMA_FROMDEVICE);
    
    		length = le16_to_cpu(rx_desc->length);
    
    		if(unlikely(!(status & IXGB_RX_DESC_STATUS_EOP))) {
    
    			/* All receives must fit into a single buffer */
    
    			IXGB_DBG("Receive packet consumed multiple buffers "
    					 "length<%x>\n", length);
    
    			dev_kfree_skb_irq(skb);
    			goto rxdesc_done;
    		}
    
    		if (unlikely(rx_desc->errors
    			     & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE
    				| IXGB_RX_DESC_ERRORS_P |
    				IXGB_RX_DESC_ERRORS_RXE))) {
    
    			dev_kfree_skb_irq(skb);
    			goto rxdesc_done;
    		}
    
    		/* Good Receive */
    		skb_put(skb, length);   //放入skb中保存
    
    		/* Receive Checksum Offload */
    		ixgb_rx_checksum(adapter, rx_desc, skb); //检查checksum
    
    		skb->protocol = eth_type_trans(skb, netdev);  //获取协议
    #ifdef CONFIG_IXGB_NAPI
    		if(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {
    			vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
    				le16_to_cpu(rx_desc->special) &
    					IXGB_RX_DESC_SPECIAL_VLAN_MASK);
    		} else {
    			netif_receive_skb(skb);  // 如果是NAPI模式 直接调用netif_receive_skb处理
    		}
    #else /* CONFIG_IXGB_NAPI */
    		if(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {
    			vlan_hwaccel_rx(skb, adapter->vlgrp,
    				le16_to_cpu(rx_desc->special) &
    					IXGB_RX_DESC_SPECIAL_VLAN_MASK);
    		} else {
    			netif_rx(skb);   // 走单个软中段的处理流程
    		}
    #endif /* CONFIG_IXGB_NAPI */
    		netdev->last_rx = jiffies;
    
    rxdesc_done:
    		/* clean up descriptor, might be written over by hw */
    		rx_desc->status = 0;
    		buffer_info->skb = NULL;
    
    		/* use prefetched values */
    		rx_desc = next_rxd;
    		buffer_info = next_buffer;
    	}
    
    	rx_ring->next_to_clean = i;   
    
    	ixgb_alloc_rx_buffers(adapter);
    
    	return cleaned;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118

    在非NAPI模式下,会调用netif_rx进入到软中段的网络数据处理中,只不过该步就没有调用poll模式处理。

    int netif_rx(struct sk_buff *skb)
    {
    	struct softnet_data *queue;
    	unsigned long flags;
    
    	/* if netpoll wants it, pretend we never saw it */
    	if (netpoll_rx(skb))
    		return NET_RX_DROP;
    
    	if (!skb->tstamp.off_sec)
    		net_timestamp(skb);
    
    	/*
    	 * The code is rearranged so that the path is the most
    	 * short when CPU is congested, but is still operating.
    	 */
    	local_irq_save(flags);
    	queue = &__get_cpu_var(softnet_data);
    
    	__get_cpu_var(netdev_rx_stat).total++;
    	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
    		if (queue->input_pkt_queue.qlen) {
    enqueue:
    			dev_hold(skb->dev);
    			__skb_queue_tail(&queue->input_pkt_queue, skb);
    			local_irq_restore(flags);
    			return NET_RX_SUCCESS;
    		}
    
    		netif_rx_schedule(&queue->backlog_dev);  // 调用队列进行接受数据的处理
    		goto enqueue;
    	}
    
    	__get_cpu_var(netdev_rx_stat).dropped++;
    	local_irq_restore(flags);
    
    	kfree_skb(skb);
    	return NET_RX_DROP;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39

    基本的两种模式下面的网络数据处理就基本分析完成。

    总结

    本文简单根据Linux-2.6系列的源码,基于网卡ixgb进行了一个网卡数据的简单的接受流程分析,网卡通过ethtool完成对应的接口,并注册对应的驱动,在通过ethtool工具使网卡开始运行的时候,就注册中断处理函数,并通过是否是NAPI处理流程来进行不同的处理,使用了NAPI会使用poll函数在softirq内核线程中,进行网卡数据的处理,效率上会比非NAPI模式要高,在非NAPI模式下面,最后也是通过通过softirq内核线程将获取的网络包进行处理,从而完成网卡数据的接受与处理。由于本人才疏学浅,如有错误请批评指正。

  • 相关阅读:
    前列腺特异抗原(PSA)介绍
    自动化测试——selenium(环境部署和元素定位)
    ThreeJS-截屏下载pdf或者图片时白屏
    前端 表单标签(二)
    数据结构(c语言版) 树的双亲表示法
    计算机毕业设计Java游泳馆管理平台(系统+程序+mysql数据库+Lw文档)
    labelme标注信息统计及使用方法
    Windows下安装RabbitMQ
    修炼k8s+flink+hdfs+dlink(七:flinkcdc)
    AbstractExecutorService 抽象类
  • 原文地址:https://blog.csdn.net/qq_33339479/article/details/126571894