• 了解ixgbe网卡驱动— 驱动注册(纯代码分享)


    1 ixgbe 网卡注册驱动

    和大部分设备驱动一样,网卡驱动是作为一个 module 注册到 kernel 的
    通过 module_init() -> ixgbe_init_module() -> pci_register_driver() 注册 ixgbe_driver
    通过 module_exit() -> ixgbe_exit_module() -> pci_unregister_driver() 注销 ixgbe_driver

    1.1 ixgbe_driver 类

    1. static struct pci_driver ixgbe_driver = {
    2. .name = ixgbe_driver_name,
    3. .id_table = ixgbe_pci_tbl,
    4. .probe = ixgbe_probe,// 系统探测到ixgbe网卡后调用ixgbe_probe()
    5. .remove = ixgbe_remove,
    6. #ifdef CONFIG_PM
    7. .suspend = ixgbe_suspend,
    8. .resume = ixgbe_resume,
    9. #endif
    10. .shutdown = ixgbe_shutdown,
    11. .sriov_configure = ixgbe_pci_sriov_configure,
    12. .err_handler = &ixgbe_err_handler
    13. };

    1.2 ixgbe_driver 注册/注销

    1. /**
    2. * ixgbe_init_module - Driver Registration Routine
    3. *
    4. * ixgbe_init_module is the first routine called when the driver is
    5. * loaded. All it does is register with the PCI subsystem.
    6. **/
    7. static int __init ixgbe_init_module(void)
    8. {
    9. int ret;
    10. pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
    11. pr_info("%s\n", ixgbe_copyright);
    12. ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name);
    13. if (!ixgbe_wq) {
    14. pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name);
    15. return -ENOMEM;
    16. }
    17. ixgbe_dbg_init();
    18. ret = pci_register_driver(&ixgbe_driver); //注册ixgbe_driver
    19. if (ret) {
    20. destroy_workqueue(ixgbe_wq);
    21. ixgbe_dbg_exit();
    22. return ret;
    23. }
    24. #ifdef CONFIG_IXGBE_DCA
    25. dca_register_notify(&dca_notifier);
    26. #endif
    27. return 0;
    28. }
    29. module_init(ixgbe_init_module);
    30. /**
    31. * ixgbe_exit_module - Driver Exit Cleanup Routine
    32. *
    33. * ixgbe_exit_module is called just before the driver is removed
    34. * from memory.
    35. **/
    36. static void __exit ixgbe_exit_module(void)
    37. {
    38. #ifdef CONFIG_IXGBE_DCA
    39. dca_unregister_notify(&dca_notifier);
    40. #endif
    41. pci_unregister_driver(&ixgbe_driver); //注销 ixgbe_driver
    42. ixgbe_dbg_exit();
    43. if (ixgbe_wq) {
    44. destroy_workqueue(ixgbe_wq);
    45. ixgbe_wq = NULL;
    46. }
    47. }
    48. module_exit(ixgbe_exit_module);

    2 ixgbe 的 PCI 注册驱动流程 pci_register_driver()

    1. pci_register_driver() ->
    2. __pci_register_driver() ->
    3. driver_register() ->
    4. bus_add_driver() ->
    5. driver_attach() ->
    6. bus_for_each_dev() ->
    7. __driver_attach() ->
    8. driver_probe_device() ->
    9. really_probe() ->
    10. pci_device_probe() ->
    11. __pci_device_probe() ->
    12. pci_call_probe() ->
    13. local_pci_probe()
    14. static long local_pci_probe(void *_ddi)
    15. {
    16. ...
    17. rc = pci_drv->probe(pci_dev, ddi->id); // 系统探测到设备后调用设备驱动的probe
    18. ...
    19. }
    【文章福利】小编推荐自己的Linux内核技术交流群: 【977878001】整理一些个人觉得比较好得学习书籍、视频资料共享在群文件里面,有需要的可以自行添加哦!!!前100进群领取,额外赠送一份 价值699的内核资料包(含视频教程、电子书、实战项目及代码)

    内核资料直通车:Linux内核源码技术学习路线+视频教程代码资料

    学习直通车:Linux内核源码/内存调优/文件系统/进程管理/设备驱动/网络协议栈

    3 ixgbe 网卡探测 ixgbe_probe()【核心】

    1. /**
    2. * ixgbe_probe - Device Initialization Routine
    3. * @pdev: PCI device information struct
    4. * @ent: entry in ixgbe_pci_tbl
    5. *
    6. * Returns 0 on success, negative on failure
    7. *
    8. * ixgbe_probe initializes an adapter identified by a pci_dev structure.
    9. * The OS initialization, configuring of the adapter private structure,
    10. * and a hardware reset occur.
    11. **/
    12. static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
    13. {
    14. struct net_device *netdev;
    15. struct ixgbe_adapter *adapter = NULL;
    16. struct ixgbe_hw *hw;
    17. const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];//根据网卡型号(82598/82599)选择ixgbe_info
    18. int i, err, pci_using_dac, expected_gts;
    19. unsigned int indices = MAX_TX_QUEUES;
    20. u8 part_str[IXGBE_PBANUM_LENGTH];
    21. bool disable_dev = false;
    22. #ifdef IXGBE_FCOE
    23. u16 device_caps;
    24. #endif
    25. u32 eec;
    26. /* Catch broken hardware that put the wrong VF device ID in
    27. * the PCIe SR-IOV capability.
    28. */
    29. if (pdev->is_virtfn) {
    30. WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
    31. pci_name(pdev), pdev->vendor, pdev->device);
    32. return -EINVAL;
    33. }
    34. /* pci_enable_device_mem() -> __pci_enable_device_flags() -> do_pci_enable_device()
    35. -> pcibios_enable_device() -> pci_enable_resources() -> pci_write_config_word()
    36. 向配置寄存器Command(0x04)中写入 PCI_COMMAND_MEMORY(0x2),允许网卡驱动访问网卡的Memory空间 */
    37. err = pci_enable_device_mem(pdev);
    38. if (err)
    39. return err;
    40. /* pci_set_dma_mask() -> dma_set_mask() -> dma_supported()
    41. 检查并设置PCI总线地址位数 */
    42. if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
    43. pci_using_dac = 1;
    44. } else {
    45. err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
    46. if (err) {
    47. dev_err(&pdev->dev,
    48. "No usable DMA configuration, aborting\n");
    49. goto err_dma;
    50. }
    51. pci_using_dac = 0;
    52. }
    53. /* pci_request_mem_regions() -> pci_request_selected_regions() -> __pci_request_selected_regions()
    54. -> __pci_request_region()-> request_region()/__request_mem_region()
    55. -> __request_region() -> __request_resource()
    56. 登记BAR中的总线地址(将resource插入iomem_resource资源树) */
    57. err = pci_request_mem_regions(pdev, ixgbe_driver_name);
    58. if (err) {
    59. dev_err(&pdev->dev,
    60. "pci_request_selected_regions failed 0x%x\n", err);
    61. goto err_pci_reg;
    62. }
    63. pci_enable_pcie_error_reporting(pdev);
    64. /* pci_set_master() -> __pci_set_master() -> pci_write_config_word()
    65. 向配置寄存器Command(0x04)中写入PCI_COMMAND_MASTER(0x4),允许网卡申请PCI总线控制权 */
    66. pci_set_master(pdev);
    67. /* pci_save_state() -> pci_read_config_dword()
    68. 读取并保存配置空间到dev->saved_config_space */
    69. pci_save_state(pdev);
    70. if (ii->mac == ixgbe_mac_82598EB) {
    71. #ifdef CONFIG_IXGBE_DCB
    72. /* 8 TC w/ 4 queues per TC */
    73. indices = 4 * MAX_TRAFFIC_CLASS;
    74. #else
    75. indices = IXGBE_MAX_RSS_INDICES;
    76. #endif
    77. }
    78. // 分配net_device和ixgbe_adapter,发送队列数为 indices
    79. netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
    80. if (!netdev) {
    81. err = -ENOMEM;
    82. goto err_alloc_etherdev;
    83. }
    84. SET_NETDEV_DEV(netdev, &pdev->dev);
    85. adapter = netdev_priv(netdev); //得到ixgbe_adapter的指针
    86. adapter->netdev = netdev;
    87. adapter->pdev = pdev;
    88. hw = &adapter->hw; //得到ixgbe_hw的指针
    89. hw->back = adapter;
    90. adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
    91. // 将BAR0中的总线地址映射成内存地址,赋给hw->hw_addr,允许网卡驱动通过hw->hw_addr访问网卡的BAR0对应的Memory空间
    92. hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
    93. pci_resource_len(pdev, 0));
    94. adapter->io_addr = hw->hw_addr;
    95. if (!hw->hw_addr) {
    96. err = -EIO;
    97. goto err_ioremap;
    98. }
    99. netdev->netdev_ops = &ixgbe_netdev_ops;// 注册ixgbe_netdev_ops
    100. ixgbe_set_ethtool_ops(netdev);
    101. netdev->watchdog_timeo = 5 * HZ;
    102. strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
    103. /* Setup hw api */
    104. hw->mac.ops = *ii->mac_ops;
    105. hw->mac.type = ii->mac;
    106. hw->mvals = ii->mvals;
    107. if (ii->link_ops)
    108. hw->link.ops = *ii->link_ops;
    109. /* EEPROM */
    110. hw->eeprom.ops = *ii->eeprom_ops;
    111. eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));// 读取BAR0对应的Memory空间的IXGBE_EEC
    112. if (ixgbe_removed(hw->hw_addr)) {
    113. err = -EIO;
    114. goto err_ioremap;
    115. }
    116. /* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */
    117. if (!(eec & BIT(8)))
    118. hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic;
    119. /* PHY */
    120. hw->phy.ops = *ii->phy_ops;
    121. hw->phy.sfp_type = ixgbe_sfp_type_unknown;
    122. /* ixgbe_identify_phy_generic will set prtad and mmds properly */
    123. hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
    124. hw->phy.mdio.mmds = 0;
    125. hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
    126. hw->phy.mdio.dev = netdev;
    127. hw->phy.mdio.mdio_read = ixgbe_mdio_read;
    128. hw->phy.mdio.mdio_write = ixgbe_mdio_write;
    129. /* setup the private structure */
    130. /* 初始化ixgbe_adapter:
    131. 设置adapter->tx/rx_ring_count1024(默认1024,最小64,最大4096
    132. 设置adapter->ring_feature[RING_F_RSS].indices为min(CPU数, IXGBE_MAX_RSS_INDICES(16))
    133. 设置adapter->ring_feature[RING_F_FDIR].indices为IXGBE_MAX_FDIR_INDICES(64)
    134. 设置adapter->flags的IXGBE_FLAG_RSS_ENABLED和IXGBE_FLAG_FDIR_HASH_CAPABLE */
    135. err = ixgbe_sw_init(adapter, ii);
    136. if (err)
    137. goto err_sw_init;
    138. /* Make sure the SWFW semaphore is in a valid state */
    139. if (hw->mac.ops.init_swfw_sync)
    140. hw->mac.ops.init_swfw_sync(hw);
    141. /* Make it possible the adapter to be woken up via WOL */
    142. switch (adapter->hw.mac.type) {
    143. case ixgbe_mac_82599EB:
    144. case ixgbe_mac_X540:
    145. case ixgbe_mac_X550:
    146. case ixgbe_mac_X550EM_x:
    147. case ixgbe_mac_x550em_a:
    148. IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
    149. break;
    150. default:
    151. break;
    152. }
    153. /*
    154. * If there is a fan on this device and it has failed log the
    155. * failure.
    156. */
    157. if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
    158. u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
    159. if (esdp & IXGBE_ESDP_SDP1)
    160. e_crit(probe, "Fan has stopped, replace the adapter\n");
    161. }
    162. if (allow_unsupported_sfp)
    163. hw->allow_unsupported_sfp = allow_unsupported_sfp;
    164. /* reset_hw fills in the perm_addr as well */
    165. hw->phy.reset_if_overtemp = true;
    166. /* ixgbe_reset_hw_82599() -> ixgbe_get_mac_addr_generic()
    167. 读取eeprom中的mac地址,写入hw->mac.perm_addr */
    168. err = hw->mac.ops.reset_hw(hw);
    169. hw->phy.reset_if_overtemp = false;
    170. ixgbe_set_eee_capable(adapter);
    171. if (err == IXGBE_ERR_SFP_NOT_PRESENT) {
    172. err = 0;
    173. } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    174. e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n");
    175. e_dev_err("Reload the driver after installing a supported module.\n");
    176. goto err_sw_init;
    177. } else if (err) {
    178. e_dev_err("HW Init failed: %d\n", err);
    179. goto err_sw_init;
    180. }
    181. #ifdef CONFIG_PCI_IOV
    182. /* SR-IOV not supported on the 82598 */
    183. if (adapter->hw.mac.type == ixgbe_mac_82598EB)
    184. goto skip_sriov;
    185. /* Mailbox */
    186. ixgbe_init_mbx_params_pf(hw);
    187. hw->mbx.ops = ii->mbx_ops;
    188. pci_sriov_set_totalvfs(pdev, IXGBE_MAX_VFS_DRV_LIMIT);
    189. ixgbe_enable_sriov(adapter, max_vfs);
    190. skip_sriov:
    191. #endif
    192. netdev->features = NETIF_F_SG |
    193. NETIF_F_TSO |
    194. NETIF_F_TSO6 |
    195. NETIF_F_RXHASH |
    196. NETIF_F_RXCSUM |
    197. NETIF_F_HW_CSUM;
    198. #define IXGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
    199. NETIF_F_GSO_GRE_CSUM | \
    200. NETIF_F_GSO_IPXIP4 | \
    201. NETIF_F_GSO_IPXIP6 | \
    202. NETIF_F_GSO_UDP_TUNNEL | \
    203. NETIF_F_GSO_UDP_TUNNEL_CSUM)
    204. netdev->gso_partial_features = IXGBE_GSO_PARTIAL_FEATURES;
    205. netdev->features |= NETIF_F_GSO_PARTIAL |
    206. IXGBE_GSO_PARTIAL_FEATURES;
    207. if (hw->mac.type >= ixgbe_mac_82599EB)
    208. netdev->features |= NETIF_F_SCTP_CRC;
    209. /* copy netdev features into list of user selectable features */
    210. netdev->hw_features |= netdev->features |
    211. NETIF_F_HW_VLAN_CTAG_FILTER |
    212. NETIF_F_HW_VLAN_CTAG_RX |
    213. NETIF_F_HW_VLAN_CTAG_TX |
    214. NETIF_F_RXALL |
    215. NETIF_F_HW_L2FW_DOFFLOAD;
    216. if (hw->mac.type >= ixgbe_mac_82599EB)
    217. netdev->hw_features |= NETIF_F_NTUPLE |
    218. NETIF_F_HW_TC;
    219. if (pci_using_dac)
    220. netdev->features |= NETIF_F_HIGHDMA;
    221. netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
    222. netdev->hw_enc_features |= netdev->vlan_features;
    223. netdev->mpls_features |= NETIF_F_SG |
    224. NETIF_F_TSO |
    225. NETIF_F_TSO6 |
    226. NETIF_F_HW_CSUM;
    227. netdev->mpls_features |= IXGBE_GSO_PARTIAL_FEATURES;
    228. /* set this bit last since it cannot be part of vlan_features */
    229. netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
    230. NETIF_F_HW_VLAN_CTAG_RX |
    231. NETIF_F_HW_VLAN_CTAG_TX;
    232. netdev->priv_flags |= IFF_UNICAST_FLT;
    233. netdev->priv_flags |= IFF_SUPP_NOFCS;
    234. /* MTU range: 68 - 9710 */
    235. netdev->min_mtu = ETH_MIN_MTU;
    236. netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
    237. #ifdef CONFIG_IXGBE_DCB
    238. if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
    239. netdev->dcbnl_ops = &ixgbe_dcbnl_ops;
    240. #endif
    241. #ifdef IXGBE_FCOE
    242. if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
    243. unsigned int fcoe_l;
    244. if (hw->mac.ops.get_device_caps) {
    245. hw->mac.ops.get_device_caps(hw, &device_caps);
    246. if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
    247. adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
    248. }
    249. fcoe_l = min_t(int, IXGBE_FCRETA_SIZE, num_online_cpus());
    250. adapter->ring_feature[RING_F_FCOE].limit = fcoe_l;
    251. netdev->features |= NETIF_F_FSO |
    252. NETIF_F_FCOE_CRC;
    253. netdev->vlan_features |= NETIF_F_FSO |
    254. NETIF_F_FCOE_CRC |
    255. NETIF_F_FCOE_MTU;
    256. }
    257. #endif /* IXGBE_FCOE */
    258. if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
    259. netdev->hw_features |= NETIF_F_LRO;
    260. if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
    261. netdev->features |= NETIF_F_LRO;
    262. /* make sure the EEPROM is good */
    263. if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
    264. e_dev_err("The EEPROM Checksum Is Not Valid\n");
    265. err = -EIO;
    266. goto err_sw_init;
    267. }
    268. eth_platform_get_mac_address(&adapter->pdev->dev,
    269. adapter->hw.mac.perm_addr);
    270. memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);
    271. if (!is_valid_ether_addr(netdev->dev_addr)) {
    272. e_dev_err("invalid MAC address\n");
    273. err = -EIO;
    274. goto err_sw_init;
    275. }
    276. /* Set hw->mac.addr to permanent MAC address */
    277. ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
    278. ixgbe_mac_set_default_filter(adapter);
    279. setup_timer(&adapter->service_timer, &ixgbe_service_timer,
    280. (unsigned long) adapter);
    281. if (ixgbe_removed(hw->hw_addr)) {
    282. err = -EIO;
    283. goto err_sw_init;
    284. }
    285. INIT_WORK(&adapter->service_task, ixgbe_service_task);
    286. set_bit(__IXGBE_SERVICE_INITED, &adapter->state);
    287. clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
    288. /* ixgbe_init_interrupt_scheme() -> ixgbe_set_num_queues() -> ixgbe_set_fdir_queues()/ixgbe_set_rss_queues()
    289. ixgbe_set_interrupt_capability() -> ixgbe_acquire_msix_vectors() -> pci_enable_msix()
    290. ixgbe_alloc_q_vectors()
    291. 根据FDIR/RSS设置adapter->num_tx/rx_queues
    292. 向PCI子系统请求中断
    293. 设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
    294. 分配发送/接收ring数组 */
    295. err = ixgbe_init_interrupt_scheme(adapter);
    296. if (err)
    297. goto err_sw_init;
    298. for (i = 0; i < adapter->num_rx_queues; i++)
    299. u64_stats_init(&adapter->rx_ring[i]->syncp);
    300. for (i = 0; i < adapter->num_tx_queues; i++)
    301. u64_stats_init(&adapter->tx_ring[i]->syncp);
    302. for (i = 0; i < adapter->num_xdp_queues; i++)
    303. u64_stats_init(&adapter->xdp_ring[i]->syncp);
    304. /* WOL not supported for all devices */
    305. adapter->wol = 0;
    306. hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);
    307. hw->wol_enabled = ixgbe_wol_supported(adapter, pdev->device,
    308. pdev->subsystem_device);
    309. if (hw->wol_enabled)
    310. adapter->wol = IXGBE_WUFC_MAG;
    311. device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
    312. /* save off EEPROM version number */
    313. hw->eeprom.ops.read(hw, 0x2e, &adapter->eeprom_verh);
    314. hw->eeprom.ops.read(hw, 0x2d, &adapter->eeprom_verl);
    315. /* pick up the PCI bus settings for reporting later */
    316. if (ixgbe_pcie_from_parent(hw))
    317. ixgbe_get_parent_bus_info(adapter);
    318. else
    319. hw->mac.ops.get_bus_info(hw);
    320. /* calculate the expected PCIe bandwidth required for optimal
    321. * performance. Note that some older parts will never have enough
    322. * bandwidth due to being older generation PCIe parts. We clamp these
    323. * parts to ensure no warning is displayed if it can't be fixed.
    324. */
    325. switch (hw->mac.type) {
    326. case ixgbe_mac_82598EB:
    327. expected_gts = min(ixgbe_enumerate_functions(adapter) * 10, 16);
    328. break;
    329. default:
    330. expected_gts = ixgbe_enumerate_functions(adapter) * 10;
    331. break;
    332. }
    333. /* don't check link if we failed to enumerate functions */
    334. if (expected_gts > 0)
    335. ixgbe_check_minimum_link(adapter, expected_gts);
    336. err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str));
    337. if (err)
    338. strlcpy(part_str, "Unknown", sizeof(part_str));
    339. if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
    340. e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
    341. hw->mac.type, hw->phy.type, hw->phy.sfp_type,
    342. part_str);
    343. else
    344. e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
    345. hw->mac.type, hw->phy.type, part_str);
    346. e_dev_info("%pM\n", netdev->dev_addr);
    347. /* reset the hardware with the new settings */
    348. err = hw->mac.ops.start_hw(hw);
    349. if (err == IXGBE_ERR_EEPROM_VERSION) {
    350. /* We are running on a pre-production device, log a warning */
    351. e_dev_warn("This device is a pre-production adapter/LOM. "
    352. "Please be aware there may be issues associated "
    353. "with your hardware. If you are experiencing "
    354. "problems please contact your Intel or hardware "
    355. "representative who provided you with this "
    356. "hardware.\n");
    357. }
    358. strcpy(netdev->name, "eth%d");
    359. pci_set_drvdata(pdev, adapter);
    360. err = register_netdev(netdev);// 注册netdev
    361. if (err)
    362. goto err_register;
    363. /* power down the optics for 82599 SFP+ fiber */
    364. if (hw->mac.ops.disable_tx_laser)
    365. hw->mac.ops.disable_tx_laser(hw);
    366. /* carrier off reporting is important to ethtool even BEFORE open */
    367. netif_carrier_off(netdev);
    368. #ifdef CONFIG_IXGBE_DCA
    369. if (dca_add_requester(&pdev->dev) == 0) {
    370. adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
    371. ixgbe_setup_dca(adapter);
    372. }
    373. #endif
    374. if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
    375. e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs);
    376. for (i = 0; i < adapter->num_vfs; i++)
    377. ixgbe_vf_configuration(pdev, (i | 0x10000000));
    378. }
    379. /* firmware requires driver version to be 0xFFFFFFFF
    380. * since os does not support feature
    381. */
    382. if (hw->mac.ops.set_fw_drv_ver)
    383. hw->mac.ops.set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF,
    384. sizeof(ixgbe_driver_version) - 1,
    385. ixgbe_driver_version);
    386. /* add san mac addr to netdev */
    387. ixgbe_add_sanmac_netdev(netdev);
    388. e_dev_info("%s\n", ixgbe_default_device_descr);
    389. #ifdef CONFIG_IXGBE_HWMON
    390. if (ixgbe_sysfs_init(adapter))
    391. e_err(probe, "failed to allocate sysfs resources\n");
    392. #endif /* CONFIG_IXGBE_HWMON */
    393. ixgbe_dbg_adapter_init(adapter);
    394. /* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */
    395. if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link)
    396. hw->mac.ops.setup_link(hw,
    397. IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
    398. true);
    399. return 0;
    400. err_register:
    401. ixgbe_release_hw_control(adapter);
    402. ixgbe_clear_interrupt_scheme(adapter);
    403. err_sw_init:
    404. ixgbe_disable_sriov(adapter);
    405. adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
    406. iounmap(adapter->io_addr);
    407. kfree(adapter->jump_tables[0]);
    408. kfree(adapter->mac_table);
    409. kfree(adapter->rss_key);
    410. err_ioremap:
    411. disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
    412. free_netdev(netdev);
    413. err_alloc_etherdev:
    414. pci_release_mem_regions(pdev);
    415. err_pci_reg:
    416. err_dma:
    417. if (!adapter || disable_dev)
    418. pci_disable_device(pdev);
    419. return err;
    420. }

    3.1 ixgbe_info 选取

    根据网卡型号(82598/82599/540/550)在 ixgbe_info_tbl 列表中选择 ixgbe_info

    1. const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
    2. static const struct ixgbe_info *ixgbe_info_tbl[] = {
    3. [board_82598] = &ixgbe_82598_info,
    4. [board_82599] = &ixgbe_82599_info,
    5. [board_X540] = &ixgbe_X540_info,
    6. [board_X550] = &ixgbe_X550_info,
    7. [board_X550EM_x] = &ixgbe_X550EM_x_info,
    8. [board_x550em_x_fw] = &ixgbe_x550em_x_fw_info,
    9. [board_x550em_a] = &ixgbe_x550em_a_info,
    10. [board_x550em_a_fw] = &ixgbe_x550em_a_fw_info,
    11. };
    12. enum ixgbe_boards {
    13. board_82598,
    14. board_82599,
    15. board_X540,
    16. board_X550,
    17. board_X550EM_x,
    18. board_x550em_x_fw,
    19. board_x550em_a,
    20. board_x550em_a_fw,
    21. };
    22. const struct ixgbe_info ixgbe_82599_info = {
    23. .mac = ixgbe_mac_82599EB,
    24. .get_invariants = &ixgbe_get_invariants_82599,
    25. .mac_ops = &mac_ops_82599,
    26. .eeprom_ops = &eeprom_ops_82599,
    27. .phy_ops = &phy_ops_82599,
    28. .mbx_ops = &mbx_ops_generic,
    29. .mvals = ixgbe_mvals_8259X,
    30. };

    3.2 net_device/ixgbe_adapter 分配

    1. netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);
    2. struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
    3. {
    4. return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
    5. }
    6. struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
    7. void (*setup)(struct net_device *), unsigned int queue_count)
    8. {
    9. struct netdev_queue *tx;
    10. struct net_device *dev;
    11. size_t alloc_size;
    12. struct net_device *p;
    13. BUG_ON(strlen(name) >= sizeof(dev->name));
    14. alloc_size = sizeof(struct net_device); // net_device的大小
    15. if (sizeof_priv) {
    16. /* ensure 32-byte alignment of private area */
    17. alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
    18. alloc_size += sizeof_priv; // 加上private data的大小
    19. }
    20. /* ensure 32-byte alignment of whole construct */
    21. alloc_size += NETDEV_ALIGN - 1;
    22. p = kzalloc(alloc_size, GFP_KERNEL); // 分配net_device和private data
    23. if (!p) {
    24. printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
    25. return NULL;
    26. }
    27. // 分配queue_count个netdev_queue(发送队列数组),一个发送队列对应一个netdev_queue
    28. tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
    29. if (!tx) {
    30. printk(KERN_ERR "alloc_netdev: Unable to allocate "
    31. "tx qdiscs.\n");
    32. goto free_p;
    33. }
    34. dev = PTR_ALIGN(p, NETDEV_ALIGN);
    35. dev->padded = (char *)dev - (char *)p;
    36. if (dev_addr_init(dev))
    37. goto free_tx;
    38. dev_unicast_init(dev);
    39. dev_net_set(dev, &init_net);
    40. dev->_tx = tx; // 保存发送队列数组
    41. dev->num_tx_queues = queue_count; // 设置发送队列数
    42. dev->real_num_tx_queues = queue_count; // 设置实际发送队列数
    43. dev->gso_max_size = GSO_MAX_SIZE;
    44. netdev_init_queues(dev); // 设置dev->_tx[i]->dev和dev->rx_queue->dev为dev
    45. INIT_LIST_HEAD(&dev->napi_list);
    46. dev->priv_flags = IFF_XMIT_DST_RELEASE;
    47. setup(dev); // 以太网为ether_setup()
    48. strcpy(dev->name, name);
    49. return dev;
    50. free_tx:
    51. kfree(tx);
    52. free_p:
    53. kfree(p);
    54. return NULL;
    55. }
    56. static void netdev_init_queues(struct net_device *dev)
    57. {
    58. netdev_init_one_queue(dev, &dev->rx_queue, NULL);
    59. netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
    60. spin_lock_init(&dev->tx_global_lock);
    61. }
    62. static void netdev_init_one_queue(struct net_device *dev,
    63. struct netdev_queue *queue,
    64. void *_unused)
    65. {
    66. queue->dev = dev;
    67. }
    68. static inline void netdev_for_each_tx_queue(struct net_device *dev,
    69. void (*f)(struct net_device *,
    70. struct netdev_queue *,
    71. void *),
    72. void *arg)
    73. {
    74. unsigned int i;
    75. for (i = 0; i < dev->num_tx_queues; i++)
    76. f(dev, &dev->_tx[i], arg);
    77. }
    78. void ether_setup(struct net_device *dev)
    79. {
    80. dev->header_ops = &eth_header_ops;
    81. dev->type = ARPHRD_ETHER; // 以太网格式
    82. dev->hard_header_len = ETH_HLEN; // 14
    83. dev->mtu = ETH_DATA_LEN; // 1500
    84. dev->addr_len = ETH_ALEN; // 6
    85. dev->tx_queue_len = 1000; /* Ethernet wants good queues */
    86. dev->flags = IFF_BROADCAST|IFF_MULTICAST;
    87. memset(dev->broadcast, 0xFF, ETH_ALEN);
    88. }

    3.3 读取eeprom中的mac地址,写入hw->mac.perm_addr

    1. struct ixgbe_info ixgbe_82599_info = {
    2. .mac = ixgbe_mac_82599EB,
    3. .get_invariants = &ixgbe_get_invariants_82599,
    4. .mac_ops = &mac_ops_82599,
    5. .eeprom_ops = &eeprom_ops_82599,
    6. .phy_ops = &phy_ops_82599,
    7. };
    8. static struct ixgbe_mac_operations mac_ops_82599 = {
    9. .init_hw = &ixgbe_init_hw_generic,
    10. .reset_hw = &ixgbe_reset_hw_82599,
    11. .start_hw = &ixgbe_start_hw_82599,
    12. .clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic,
    13. .get_media_type = &ixgbe_get_media_type_82599,
    14. .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82599,
    15. .enable_rx_dma = &ixgbe_enable_rx_dma_82599,
    16. .get_mac_addr = &ixgbe_get_mac_addr_generic,
    17. .get_san_mac_addr = &ixgbe_get_san_mac_addr_82599,
    18. .get_device_caps = &ixgbe_get_device_caps_82599,
    19. .stop_adapter = &ixgbe_stop_adapter_generic,
    20. .get_bus_info = &ixgbe_get_bus_info_generic,
    21. .set_lan_id = &ixgbe_set_lan_id_multi_port_pcie,
    22. .read_analog_reg8 = &ixgbe_read_analog_reg8_82599,
    23. .write_analog_reg8 = &ixgbe_write_analog_reg8_82599,
    24. .setup_link = &ixgbe_setup_mac_link_82599,
    25. .check_link = &ixgbe_check_mac_link_82599,
    26. .get_link_capabilities = &ixgbe_get_link_capabilities_82599,
    27. .led_on = &ixgbe_led_on_generic,
    28. .led_off = &ixgbe_led_off_generic,
    29. .blink_led_start = &ixgbe_blink_led_start_generic,
    30. .blink_led_stop = &ixgbe_blink_led_stop_generic,
    31. .set_rar = &ixgbe_set_rar_generic,
    32. .clear_rar = &ixgbe_clear_rar_generic,
    33. .set_vmdq = &ixgbe_set_vmdq_82599,
    34. .clear_vmdq = &ixgbe_clear_vmdq_82599,
    35. .init_rx_addrs = &ixgbe_init_rx_addrs_generic,
    36. .update_uc_addr_list = &ixgbe_update_uc_addr_list_generic,
    37. .update_mc_addr_list = &ixgbe_update_mc_addr_list_generic,
    38. .enable_mc = &ixgbe_enable_mc_generic,
    39. .disable_mc = &ixgbe_disable_mc_generic,
    40. .clear_vfta = &ixgbe_clear_vfta_82599,
    41. .set_vfta = &ixgbe_set_vfta_82599,
    42. .fc_enable = &ixgbe_fc_enable_generic,
    43. .init_uta_tables = &ixgbe_init_uta_tables_82599,
    44. .setup_sfp = &ixgbe_setup_sfp_modules_82599,
    45. };
    46. static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
    47. {
    48. s32 status = 0;
    49. u32 ctrl, ctrl_ext;
    50. u32 i;
    51. u32 autoc;
    52. u32 autoc2;
    53. /* Call adapter stop to disable tx/rx and clear interrupts */
    54. hw->mac.ops.stop_adapter(hw);
    55. /* PHY ops must be identified and initialized prior to reset */
    56. /* Init PHY and function pointers, perform SFP setup */
    57. status = hw->phy.ops.init(hw);
    58. if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
    59. goto reset_hw_out;
    60. /* Setup SFP module if there is one present. */
    61. if (hw->phy.sfp_setup_needed) {
    62. status = hw->mac.ops.setup_sfp(hw);
    63. hw->phy.sfp_setup_needed = false;
    64. }
    65. /* Reset PHY */
    66. if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
    67. hw->phy.ops.reset(hw);
    68. /*
    69. * Prevent the PCI-E bus from from hanging by disabling PCI-E master
    70. * access and verify no pending requests before reset
    71. */
    72. status = ixgbe_disable_pcie_master(hw);
    73. if (status != 0) {
    74. status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
    75. hw_dbg(hw, "PCI-E Master disable polling has failed.\n");
    76. }
    77. /*
    78. * Issue global reset to the MAC. This needs to be a SW reset.
    79. * If link reset is used, it might reset the MAC when mng is using it
    80. */
    81. ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
    82. IXGBE_WRITE_REG(hw, IXGBE_CTRL, (ctrl | IXGBE_CTRL_RST));
    83. IXGBE_WRITE_FLUSH(hw);
    84. /* Poll for reset bit to self-clear indicating reset is complete */
    85. for (i = 0; i < 10; i++) {
    86. udelay(1);
    87. ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
    88. if (!(ctrl & IXGBE_CTRL_RST))
    89. break;
    90. }
    91. if (ctrl & IXGBE_CTRL_RST) {
    92. status = IXGBE_ERR_RESET_FAILED;
    93. hw_dbg(hw, "Reset polling failed to complete.\n");
    94. }
    95. /* Clear PF Reset Done bit so PF/VF Mail Ops can work */
    96. ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    97. ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
    98. IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    99. msleep(50);
    100. /*
    101. * Store the original AUTOC/AUTOC2 values if they have not been
    102. * stored off yet. Otherwise restore the stored original
    103. * values since the reset operation sets back to defaults.
    104. */
    105. autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
    106. autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
    107. if (hw->mac.orig_link_settings_stored == false) {
    108. hw->mac.orig_autoc = autoc;
    109. hw->mac.orig_autoc2 = autoc2;
    110. hw->mac.orig_link_settings_stored = true;
    111. } else {
    112. if (autoc != hw->mac.orig_autoc)
    113. IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
    114. IXGBE_AUTOC_AN_RESTART));
    115. if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
    116. (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
    117. autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
    118. autoc2 |= (hw->mac.orig_autoc2 &
    119. IXGBE_AUTOC2_UPPER_MASK);
    120. IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
    121. }
    122. }
    123. /*
    124. * Store MAC address from RAR0, clear receive address registers, and
    125. * clear the multicast table. Also reset num_rar_entries to 128,
    126. * since we modify this value when programming the SAN MAC address.
    127. */
    128. hw->mac.num_rar_entries = 128;
    129. hw->mac.ops.init_rx_addrs(hw);
    130. /* Store the permanent mac address */
    131. hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); // 读取eeprom中的mac地址,写入hw->mac.perm_addr
    132. /* Store the permanent SAN mac address */
    133. hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
    134. /* Add the SAN MAC address to the RAR only if it's a valid address */
    135. if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
    136. hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
    137. hw->mac.san_addr, 0, IXGBE_RAH_AV);
    138. /* Reserve the last RAR for the SAN MAC address */
    139. hw->mac.num_rar_entries--;
    140. }
    141. reset_hw_out:
    142. return status;
    143. }
    144. s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
    145. {
    146. u32 rar_high;
    147. u32 rar_low;
    148. u16 i;
    149. rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
    150. rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
    151. for (i = 0; i < 4; i++)
    152. mac_addr[i] = (u8)(rar_low >> (i*8));
    153. for (i = 0; i < 2; i++)
    154. mac_addr[i+4] = (u8)(rar_high >> (i*8));
    155. return 0;
    156. }
    157. #define IXGBE_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
    158. (0x0A200 + ((_i) * 8)))
    159. #define IXGBE_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
    160. (0x0A204 + ((_i) * 8)))

    3.4 ixgbe_init_interrupt_scheme()

    1. /**
    2. * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
    3. * @adapter: board private structure to initialize
    4. *
    5. * We determine which interrupt scheme to use based on...
    6. * - Kernel support (MSI, MSI-X)
    7. * - which can be user-defined (via MODULE_PARAM)
    8. * - Hardware queue count (num_*_queues)
    9. * - defined by miscellaneous hardware support/features (RSS, etc.)
    10. **/
    11. int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
    12. {
    13. int err;
    14. /* Number of supported queues */
    15. ixgbe_set_num_queues(adapter); //根据FDIR/RSS设置adapter->num_tx/rx_queues
    16. /* Set interrupt mode */
    17. ixgbe_set_interrupt_capability(adapter); //向PCI子系统请求中断
    18. err = ixgbe_alloc_q_vectors(adapter); //设置poll函数,分配ixgbe_q_vector,初始化napi并加入napi_list
    19. if (err) {
    20. e_dev_err("Unable to allocate memory for queue vectors\n");
    21. goto err_alloc_q_vectors;
    22. }
    23. ixgbe_cache_ring_register(adapter);// 分配发送/接收ring数组
    24. e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
    25. (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
    26. adapter->num_rx_queues, adapter->num_tx_queues,
    27. adapter->num_xdp_queues);
    28. set_bit(__IXGBE_DOWN, &adapter->state);
    29. return 0;
    30. err_alloc_q_vectors:
    31. ixgbe_reset_interrupt_capability(adapter);
    32. return err;
    33. }

    3.4.1 设置收发队列 ixgbe_set_num_queues()

    1. /**
    2. * ixgbe_set_num_queues - Allocate queues for device, feature dependent
    3. * @adapter: board private structure to initialize
    4. *
    5. * This is the top level queue allocation routine. The order here is very
    6. * important, starting with the "most" number of features turned on at once,
    7. * and ending with the smallest set of features. This way large combinations
    8. * can be allocated if they're turned on, and smaller combinations are the
    9. * fallthrough conditions.
    10. *
    11. **/
    12. static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
    13. {
    14. /* Start with base case */
    15. adapter->num_rx_queues = 1;
    16. adapter->num_tx_queues = 1;
    17. adapter->num_xdp_queues = 0;
    18. adapter->num_rx_pools = adapter->num_rx_queues;
    19. adapter->num_rx_queues_per_pool = 1;
    20. #ifdef CONFIG_IXGBE_DCB
    21. if (ixgbe_set_dcb_sriov_queues(adapter))
    22. return;
    23. if (ixgbe_set_dcb_queues(adapter))
    24. return;
    25. #endif
    26. if (ixgbe_set_sriov_queues(adapter))
    27. return;
    28. ixgbe_set_rss_queues(adapter);
    29. }

    3.4.2 向PCI子系统请求中断 ixgbe_set_interrupt_capability()

    1. /**
    2. * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
    3. * @adapter: board private structure to initialize
    4. *
    5. * Attempt to configure the interrupts using the best available
    6. * capabilities of the hardware and the kernel.
    7. **/
    8. static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
    9. {
    10. int err;
    11. /* We will try to get MSI-X interrupts first */
    12. if (!ixgbe_acquire_msix_vectors(adapter))
    13. return;
    14. /* At this point, we do not have MSI-X capabilities. We need to
    15. * reconfigure or disable various features which require MSI-X
    16. * capability.
    17. */
    18. /* Disable DCB unless we only have a single traffic class */
    19. if (netdev_get_num_tc(adapter->netdev) > 1) {
    20. e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
    21. netdev_reset_tc(adapter->netdev);
    22. if (adapter->hw.mac.type == ixgbe_mac_82598EB)
    23. adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
    24. adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
    25. adapter->temp_dcb_cfg.pfc_mode_enable = false;
    26. adapter->dcb_cfg.pfc_mode_enable = false;
    27. }
    28. adapter->dcb_cfg.num_tcs.pg_tcs = 1;
    29. adapter->dcb_cfg.num_tcs.pfc_tcs = 1;
    30. /* Disable SR-IOV support */
    31. e_dev_warn("Disabling SR-IOV support\n");
    32. ixgbe_disable_sriov(adapter);
    33. /* Disable RSS */
    34. e_dev_warn("Disabling RSS support\n");
    35. adapter->ring_feature[RING_F_RSS].limit = 1;
    36. /* recalculate number of queues now that many features have been
    37. * changed or disabled.
    38. */
    39. ixgbe_set_num_queues(adapter);
    40. adapter->num_q_vectors = 1;
    41. err = pci_enable_msi(adapter->pdev); //向PCI子系统请求1个msi中断
    42. if (err)
    43. e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
    44. err);
    45. else
    46. adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
    47. }

    3.4.3 申请中断向量表 ixgbe_alloc_q_vectors

    1. /**
    2. * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
    3. * @adapter: board private structure to initialize
    4. *
    5. * We allocate one q_vector per queue interrupt. If allocation fails we
    6. * return -ENOMEM.
    7. **/
    8. static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
    9. {
    10. int q_vectors = adapter->num_q_vectors;
    11. int rxr_remaining = adapter->num_rx_queues;
    12. int txr_remaining = adapter->num_tx_queues;
    13. int xdp_remaining = adapter->num_xdp_queues;
    14. int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
    15. int err;
    16. /* only one q_vector if MSI-X is disabled. */
    17. // 使用MSIX(Message Signaled Interrupt-X)
    18. // 去掉绑定ixgbe0所在NUMA的所有CPU的msix中断(LSC等)
    19. if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
    20. q_vectors = 1;
    21. if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
    22. for (; rxr_remaining; v_idx++) {
    23. err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
    24. 0, 0, 0, 0, 1, rxr_idx); //分配ixgbe_q_vector
    25. if (err)
    26. goto err_out;
    27. /* update counts and index */
    28. rxr_remaining--;
    29. rxr_idx++;
    30. }
    31. }
    32. for (; v_idx < q_vectors; v_idx++) {
    33. int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
    34. int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
    35. int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors - v_idx);
    36. err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
    37. tqpv, txr_idx,
    38. xqpv, xdp_idx,
    39. rqpv, rxr_idx);
    40. if (err)
    41. goto err_out;
    42. /* update counts and index */
    43. rxr_remaining -= rqpv;
    44. txr_remaining -= tqpv;
    45. xdp_remaining -= xqpv;
    46. rxr_idx++;
    47. txr_idx++;
    48. xdp_idx += xqpv;
    49. }
    50. return 0;
    51. err_out:
    52. adapter->num_tx_queues = 0;
    53. adapter->num_xdp_queues = 0;
    54. adapter->num_rx_queues = 0;
    55. adapter->num_q_vectors = 0;
    56. while (v_idx--)
    57. ixgbe_free_q_vector(adapter, v_idx);
    58. return -ENOMEM;
    59. }

    中断向量表分配 ixgbe_alloc_q_vector,NAPI的模式的 poll 函数(ixgbe_poll:一次读取64个数据包)注册

    1. /**
    2. * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
    3. * @adapter: board private structure to initialize
    4. * @v_count: q_vectors allocated on adapter, used for ring interleaving
    5. * @v_idx: index of vector in adapter struct
    6. * @txr_count: total number of Tx rings to allocate
    7. * @txr_idx: index of first Tx ring to allocate
    8. * @xdp_count: total number of XDP rings to allocate
    9. * @xdp_idx: index of first XDP ring to allocate
    10. * @rxr_count: total number of Rx rings to allocate
    11. * @rxr_idx: index of first Rx ring to allocate
    12. *
    13. * We allocate one q_vector. If allocation fails we return -ENOMEM.
    14. **/
    15. static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
    16. int v_count, int v_idx,
    17. int txr_count, int txr_idx,
    18. int xdp_count, int xdp_idx,
    19. int rxr_count, int rxr_idx)
    20. {
    21. struct ixgbe_q_vector *q_vector;
    22. struct ixgbe_ring *ring;
    23. int node = NUMA_NO_NODE;
    24. int cpu = -1;
    25. int ring_count, size;
    26. u8 tcs = netdev_get_num_tc(adapter->netdev);
    27. ring_count = txr_count + rxr_count + xdp_count;
    28. size = sizeof(struct ixgbe_q_vector) +
    29. (sizeof(struct ixgbe_ring) * ring_count);
    30. /* customize cpu for Flow Director mapping */
    31. if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
    32. u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
    33. if (rss_i > 1 && adapter->atr_sample_rate) {
    34. if (cpu_online(v_idx)) {
    35. cpu = v_idx;
    36. node = cpu_to_node(cpu);
    37. }
    38. }
    39. }
    40. /* allocate q_vector and rings */
    41. q_vector = kzalloc_node(size, GFP_KERNEL, node);
    42. if (!q_vector)
    43. q_vector = kzalloc(size, GFP_KERNEL);
    44. if (!q_vector)
    45. return -ENOMEM;
    46. /* setup affinity mask and node */
    47. if (cpu != -1)
    48. cpumask_set_cpu(cpu, &q_vector->affinity_mask);
    49. q_vector->numa_node = node;
    50. #ifdef CONFIG_IXGBE_DCA
    51. /* initialize CPU for DCA */
    52. q_vector->cpu = -1;
    53. #endif
    54. /* 初始化q_vector->napi并加入adapter->netdev的napi_list,
    55. 其中poll函数为ixgbe_clean_rxtx_many()/ixgbe_poll(),一次poll的最大报文数为64 */
    56. /* initialize NAPI */
    57. netif_napi_add(adapter->netdev, &q_vector->napi,
    58. ixgbe_poll, 64);
    59. /* tie q_vector and adapter together */
    60. adapter->q_vector[v_idx] = q_vector;// 地址赋给adapter->q_vector[q_idx]
    61. q_vector->adapter = adapter;
    62. q_vector->v_idx = v_idx;
    63. /* initialize work limits */
    64. q_vector->tx.work_limit = adapter->tx_work_limit;
    65. /* initialize pointer to rings */
    66. ring = q_vector->ring;
    67. /* intialize ITR */
    68. if (txr_count && !rxr_count) {
    69. /* tx only vector */
    70. if (adapter->tx_itr_setting == 1)
    71. q_vector->itr = IXGBE_12K_ITR;
    72. else
    73. q_vector->itr = adapter->tx_itr_setting;
    74. } else {
    75. /* rx or rx/tx vector */
    76. if (adapter->rx_itr_setting == 1)
    77. q_vector->itr = IXGBE_20K_ITR;
    78. else
    79. q_vector->itr = adapter->rx_itr_setting;
    80. }
    81. while (txr_count) {
    82. /* assign generic ring traits */
    83. ring->dev = &adapter->pdev->dev;
    84. ring->netdev = adapter->netdev;
    85. /* configure backlink on ring */
    86. ring->q_vector = q_vector;
    87. /* update q_vector Tx values */
    88. ixgbe_add_ring(ring, &q_vector->tx);
    89. /* apply Tx specific ring traits */
    90. ring->count = adapter->tx_ring_count;
    91. if (adapter->num_rx_pools > 1)
    92. ring->queue_index =
    93. txr_idx % adapter->num_rx_queues_per_pool;
    94. else
    95. ring->queue_index = txr_idx;
    96. /* assign ring to adapter */
    97. WRITE_ONCE(adapter->tx_ring[txr_idx], ring);
    98. /* update count and index */
    99. txr_count--;
    100. txr_idx += v_count;
    101. /* push pointer to next ring */
    102. ring++;
    103. }
    104. while (xdp_count) {
    105. /* assign generic ring traits */
    106. ring->dev = &adapter->pdev->dev;
    107. ring->netdev = adapter->netdev;
    108. /* configure backlink on ring */
    109. ring->q_vector = q_vector;
    110. /* update q_vector Tx values */
    111. ixgbe_add_ring(ring, &q_vector->tx);
    112. /* apply Tx specific ring traits */
    113. ring->count = adapter->tx_ring_count;
    114. ring->queue_index = xdp_idx;
    115. set_ring_xdp(ring);
    116. /* assign ring to adapter */
    117. WRITE_ONCE(adapter->xdp_ring[xdp_idx], ring);
    118. /* update count and index */
    119. xdp_count--;
    120. xdp_idx++;
    121. /* push pointer to next ring */
    122. ring++;
    123. }
    124. while (rxr_count) {
    125. /* assign generic ring traits */
    126. ring->dev = &adapter->pdev->dev;
    127. ring->netdev = adapter->netdev;
    128. /* configure backlink on ring */
    129. ring->q_vector = q_vector;
    130. /* update q_vector Rx values */
    131. ixgbe_add_ring(ring, &q_vector->rx);
    132. /*
    133. * 82599 errata, UDP frames with a 0 checksum
    134. * can be marked as checksum errors.
    135. */
    136. if (adapter->hw.mac.type == ixgbe_mac_82599EB)
    137. set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state);
    138. #ifdef IXGBE_FCOE
    139. if (adapter->netdev->features & NETIF_F_FCOE_MTU) {
    140. struct ixgbe_ring_feature *f;
    141. f = &adapter->ring_feature[RING_F_FCOE];
    142. if ((rxr_idx >= f->offset) &&
    143. (rxr_idx < f->offset + f->indices))
    144. set_bit(__IXGBE_RX_FCOE, &ring->state);
    145. }
    146. #endif /* IXGBE_FCOE */
    147. /* apply Rx specific ring traits */
    148. ring->count = adapter->rx_ring_count;
    149. if (adapter->num_rx_pools > 1)
    150. ring->queue_index =
    151. rxr_idx % adapter->num_rx_queues_per_pool;
    152. else
    153. ring->queue_index = rxr_idx;
    154. /* assign ring to adapter */
    155. WRITE_ONCE(adapter->rx_ring[rxr_idx], ring);
    156. /* update count and index */
    157. rxr_count--;
    158. rxr_idx += v_count;
    159. /* push pointer to next ring */
    160. ring++;
    161. }
    162. return 0;
    163. }

    3.4.4 rx/tx 中的描述符 fd 分配注册 ixgbe_cache_ring_register

    1. /**
    2. * ixgbe_cache_ring_register - Descriptor ring to register mapping
    3. * @adapter: board private structure to initialize
    4. *
    5. * Once we know the feature-set enabled for the device, we'll cache
    6. * the register offset the descriptor ring is assigned to.
    7. *
    8. * Note, the order the various feature calls is important. It must start with
    9. * the "most" features enabled at the same time, then trickle down to the
    10. * least amount of features turned on at once.
    11. **/
    12. static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
    13. {
    14. /* start with default case */
    15. adapter->rx_ring[0]->reg_idx = 0;
    16. adapter->tx_ring[0]->reg_idx = 0;
    17. #ifdef CONFIG_IXGBE_DCB
    18. if (ixgbe_cache_ring_dcb_sriov(adapter))
    19. return;
    20. if (ixgbe_cache_ring_dcb(adapter))
    21. return;
    22. #endif
    23. if (ixgbe_cache_ring_sriov(adapter))
    24. return;
    25. ixgbe_cache_ring_rss(adapter);
    26. }

     

  • 相关阅读:
    MySQL数据类型之JSON
    2022亚太C题详细思路
    鸿鹄工程项目管理系统em Spring Cloud+Spring Boot+前后端分离构建工程项目管理系统
    阿里影业+大麦,开启大文娱新纪元?
    基于python-CNN深度学习的食物识别-含数据集+pyqt界面
    1000+已成功入职的软件测试工程师简历范文模板(含真实简历),教你软件测试工程师简历如何编写?
    半年总结 -要有松弛感的慢生活
    StringUtils 系列之 StringUtils.isBlank() 和 StringUtils.isNotBlank() 的区别、CollectionUtils.isEmpty()
    浅谈Java内部锁synchronized
    Java用log4j写日志
  • 原文地址:https://blog.csdn.net/m0_74282605/article/details/127936488