1. 程式人生 > >ixgbe網絡卡驅動(一)

ixgbe網絡卡驅動(一)

註冊/登出網絡卡驅動

通過ixgbe_init_module() -> pci_register_driver()註冊ixgbe_driver
通過ixgbe_exit_module() -> pci_unregister_driver()登出ixgbe_driver

static struct pci_driver ixgbe_driver = {
   ...
   .probe    = ixgbe_probe, // 系統探測到ixgbe網絡卡後呼叫ixgbe_probe()
   ...
};

static int __init ixgbe_init_module(void
) { ... ret = pci_register_driver(&ixgbe_driver); // 註冊ixgbe_driver ... } module_init(ixgbe_init_module); static void __exit ixgbe_exit_module(void) { ... pci_unregister_driver(&ixgbe_driver); // 登出ixgbe_driver ... } module_exit(ixgbe_exit_module);

ixgbe_probe()

static const
struct net_device_ops ixgbe_netdev_ops = { ... .ndo_open = ixgbe_open, // ixgbe網絡卡啟動時呼叫ixgbe_open() ... }; static int __devinit ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct ixgbe_adapter *adapter = NULL; struct
ixgbe_hw *hw; const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; // 根據網絡卡型號(82598/82599)選擇ixgbe_info static int cards_found; int i, err, pci_using_dac; #ifdef IXGBE_FCOE u16 device_caps; #endif u32 part_num, eec; /* pci_enable_device_mem() -> __pci_enable_device_flags() -> do_pci_enable_device() -> pcibios_enable_device() -> pci_enable_resources() -> pci_write_config_word() 向配置暫存器Command(0x04)中寫入PCI_COMMAND_MEMORY(0x2),允許網絡卡驅動訪問網絡卡的Memory空間 */ err = pci_enable_device_mem(pdev); if (err) return err; /* pci_set_dma_mask() -> dma_set_mask() -> dma_supported() 檢查並設定PCI匯流排地址位數 */ if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { pci_using_dac = 1; } else { err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "No usable DMA " "configuration, aborting\n"); goto err_dma; } } pci_using_dac = 0; } /* pci_request_selected_regions() -> __pci_request_selected_regions() -> __pci_request_region() -> request_region()/__request_mem_region() -> __request_region() -> __request_resource() 登記BAR中的匯流排地址(將resource插入iomem_resource資源樹) */ err = pci_request_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM), ixgbe_driver_name); if (err) { dev_err(&pdev->dev, "pci_request_selected_regions failed 0x%x\n", err); goto err_pci_reg; } pci_enable_pcie_error_reporting(pdev); /* pci_set_master() -> __pci_set_master() -> pci_write_config_word() 向配置暫存器Command(0x04)中寫入PCI_COMMAND_MASTER(0x4),允許網絡卡申請PCI匯流排控制權 */ pci_set_master(pdev); /* pci_save_state() -> pci_read_config_dword() 讀取並儲存配置空間到dev->saved_config_space */ pci_save_state(pdev); // 分配net_device和ixgbe_adapter,傳送佇列數為MAX_TX_QUEUES(128) netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES); if (!netdev) { err = -ENOMEM; goto err_alloc_etherdev; } SET_NETDEV_DEV(netdev, &pdev->dev); pci_set_drvdata(pdev, netdev); adapter = netdev_priv(netdev); // 得到ixgbe_adapter的指標 adapter->netdev = netdev; adapter->pdev = pdev; hw = &adapter->hw; // 得到ixgbe_hw的指標 hw->back = adapter; adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; // 將BAR0中的匯流排地址對映成記憶體地址,賦給hw->hw_addr,允許網絡卡驅動通過hw->hw_addr訪問網絡卡的BAR0對應的Memory空間 hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (!hw->hw_addr) { err = -EIO; goto err_ioremap; } for (i = 1; i <= 5; i++) { if (pci_resource_len(pdev, i) == 0) continue; } netdev->netdev_ops = &ixgbe_netdev_ops; // 註冊ixgbe_netdev_ops ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; strcpy(netdev->name, pci_name(pdev)); adapter->bd_number = cards_found; // 設定adapter->bd_number為0 /* Setup hw api */ memcpy(&hw->mac.ops, ii->mac_ops, sizeof(hw->mac.ops)); hw->mac.type = ii->mac; /* EEPROM */ memcpy(&hw->eeprom.ops, ii->eeprom_ops, sizeof(hw->eeprom.ops)); eec = IXGBE_READ_REG(hw, IXGBE_EEC); // 讀取BAR0對應的Memory空間的IXGBE_EEC /* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */ if (!(eec & (1 << 8))) hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic; /* PHY */ memcpy(&hw->phy.ops, ii->phy_ops, sizeof(hw->phy.ops)); hw->phy.sfp_type = ixgbe_sfp_type_unknown; /* ixgbe_identify_phy_generic will set prtad and mmds properly */ hw->phy.mdio.prtad = MDIO_PRTAD_NONE; hw->phy.mdio.mmds = 0; hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; hw->phy.mdio.dev = netdev; hw->phy.mdio.mdio_read = ixgbe_mdio_read; hw->phy.mdio.mdio_write = ixgbe_mdio_write; /* set up this timer and work struct before calling get_invariants * which might start the timer */ init_timer(&adapter->sfp_timer); adapter->sfp_timer.function = &ixgbe_sfp_timer; adapter->sfp_timer.data = (unsigned long) adapter; INIT_WORK(&adapter->sfp_task, ixgbe_sfp_task); /* multispeed fiber has its own tasklet, called from GPI SDP1 context */ INIT_WORK(&adapter->multispeed_fiber_task, ixgbe_multispeed_fiber_task); /* a new SFP+ module arrival, called from GPI SDP2 context */ INIT_WORK(&adapter->sfp_config_module_task, ixgbe_sfp_config_module_task); /* ixgbe_get_invariants_82599() -> ixgbe_get_pcie_msix_count_82599() 設定hw->mac->max_tx/rx_queues為IXGBE_82599_MAX_TX/RX_QUEUES(128) 讀取並儲存EEPROM的MSI_X_N(0x3F = 63)到hw->mac->max_msix_vectors */ ii->get_invariants(hw); /* setup the private structure */ /* 初始化ixgbe_adapter: 設定adapter->tx/rx_ring_count為1024(預設1024,最小64,最大4096) 設定adapter->ring_feature[RING_F_RSS].indices為min(CPU數, IXGBE_MAX_RSS_INDICES(16)) 設定adapter->ring_feature[RING_F_FDIR].indices為IXGBE_MAX_FDIR_INDICES(64) 設定adapter->flags的IXGBE_FLAG_RSS_ENABLED和IXGBE_FLAG_FDIR_HASH_CAPABLE */ err = ixgbe_sw_init(adapter); if (err) goto err_sw_init; /* * If there is a fan on this device and it has failed log the * failure. */ if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) { u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); if (esdp & IXGBE_ESDP_SDP1) DPRINTK(PROBE, CRIT, "Fan has stopped, replace the adapter\n"); } /* reset_hw fills in the perm_addr as well */ /* ixgbe_reset_hw_82599() -> ixgbe_get_mac_addr_generic() 讀取eeprom中的mac地址,寫入hw->mac.perm_addr */ err = hw->mac.ops.reset_hw(hw); if (err == IXGBE_ERR_SFP_NOT_PRESENT && hw->mac.type == ixgbe_mac_82598EB) { /* * Start a kernel thread to watch for a module to arrive. * Only do this for 82598, since 82599 will generate * interrupts on module arrival. */ set_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state); mod_timer(&adapter->sfp_timer, round_jiffies(jiffies + (2 * HZ))); err = 0; } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { dev_err(&adapter->pdev->dev, "failed to initialize because " "an unsupported SFP+ module type was detected.\n" "Reload the driver after installing a supported " "module.\n"); goto err_sw_init; } else if (err) { dev_err(&adapter->pdev->dev, "HW Init failed: %d\n", err); goto err_sw_init; } netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; netdev->features |= NETIF_F_IPV6_CSUM; netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; netdev->features |= NETIF_F_GRO; if (adapter->hw.mac.type == ixgbe_mac_82599EB) netdev->features |= NETIF_F_SCTP_CSUM; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_IP_CSUM; netdev->vlan_features |= NETIF_F_IPV6_CSUM; netdev->vlan_features |= NETIF_F_SG; if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; #ifdef CONFIG_IXGBE_DCB netdev->dcbnl_ops = &dcbnl_ops; #endif #ifdef IXGBE_FCOE if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) { if (hw->mac.ops.get_device_caps) { hw->mac.ops.get_device_caps(hw, &device_caps); if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE; } } #endif /* IXGBE_FCOE */ if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) netdev->features |= NETIF_F_LRO; /* make sure the EEPROM is good */ if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) { dev_err(&pdev->dev, "The EEPROM Checksum Is Not Valid\n"); err = -EIO; goto err_eeprom; } memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); // 將mac地址賦給netdev->dev_addr memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len); if (ixgbe_validate_mac_addr(netdev->perm_addr)) { dev_err(&pdev->dev, "invalid MAC address\n"); err = -EIO; goto err_eeprom; } init_timer(&adapter->watchdog_timer); adapter->watchdog_timer.function = &ixgbe_watchdog; adapter->watchdog_timer.data = (unsigned long)adapter; INIT_WORK(&adapter->reset_task, ixgbe_reset_task); INIT_WORK(&adapter->watchdog_task, ixgbe_watchdog_task); /* ixgbe_init_interrupt_scheme() -> ixgbe_set_num_queues() -> ixgbe_set_fdir_queues()/ixgbe_set_rss_queues() ixgbe_set_interrupt_capability() -> ixgbe_acquire_msix_vectors() -> pci_enable_msix() ixgbe_alloc_q_vectors() ixgbe_alloc_queues() 根據FDIR/RSS設定adapter->num_tx/rx_queues 向PCI子系統請求中斷 設定poll函式,分配ixgbe_q_vector,初始化napi並加入napi_list 分配發送/接收ring陣列 */ err = ixgbe_init_interrupt_scheme(adapter); if (err) goto err_sw_init; switch (pdev->device) { case IXGBE_DEV_ID_82599_KX4: adapter->wol = (IXGBE_WUFC_MAG | IXGBE_WUFC_EX | IXGBE_WUFC_MC | IXGBE_WUFC_BC); /* Enable ACPI wakeup in GRC */ IXGBE_WRITE_REG(hw, IXGBE_GRC, (IXGBE_READ_REG(hw, IXGBE_GRC) & ~IXGBE_GRC_APME)); break; default: adapter->wol = 0; break; } device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); /* pick up the PCI bus settings for reporting later */ hw->mac.ops.get_bus_info(hw); /* print bus type/speed/width info */ dev_info(&pdev->dev, "(PCI Express:%s:%s) %pM\n", ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s": (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"), ((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : "Unknown"), netdev->dev_addr); ixgbe_read_pba_num_generic(hw, &part_num); if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present) dev_info(&pdev->dev, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %06x-%03x\n", hw->mac.type, hw->phy.type, hw->phy.sfp_type, (part_num >> 8), (part_num & 0xff)); else dev_info(&pdev->dev, "MAC: %d, PHY: %d, PBA No: %06x-%03x\n", hw->mac.type, hw->phy.type, (part_num >> 8), (part_num & 0xff)); if (hw->bus.width <= ixgbe_bus_width_pcie_x4) { dev_warn(&pdev->dev, "PCI-Express bandwidth available for " "this card is not sufficient for optimal " "performance.\n"); dev_warn(&pdev->dev, "For optimal performance a x8 " "PCI-Express slot is required.\n"); } /* save off EEPROM version number */ hw->eeprom.ops.read(hw, 0x29, &adapter->eeprom_version); /* reset the hardware with the new settings */ err = hw->mac.ops.start_hw(hw); if (err == IXGBE_ERR_EEPROM_VERSION) { /* We are running on a pre-production device, log a warning */ dev_warn(&pdev->dev, "This device is a pre-production " "adapter/LOM. Please be aware there may be issues " "associated with your hardware. If you are " "experiencing problems please contact your Intel or " "hardware representative who provided you with this " "hardware.\n"); } strcpy(netdev->name, "eth%d"); err = register_netdev(netdev); // 註冊netdev if (err) goto err_register; /* carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(netdev); if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE || adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) INIT_WORK(&adapter->fdir_reinit_task, ixgbe_fdir_reinit_task); #ifdef CONFIG_IXGBE_DCA if (dca_add_requester(&pdev->dev) == 0) { adapter->flags |= IXGBE_FLAG_DCA_ENABLED; ixgbe_setup_dca(adapter); } #endif /* add san mac addr to netdev */ ixgbe_add_sanmac_netdev(netdev); dev_info(&pdev->dev, "Intel(R) 10 Gigabit Network Connection\n"); cards_found++; return 0; err_register: ixgbe_release_hw_control(adapter); ixgbe_clear_interrupt_scheme(adapter); err_sw_init: err_eeprom: clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state); del_timer_sync(&adapter->sfp_timer); cancel_work_sync(&adapter->sfp_task); cancel_work_sync(&adapter->multispeed_fiber_task); cancel_work_sync(&adapter->sfp_config_module_task); iounmap(hw->hw_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_reg: err_dma: pci_disable_device(pdev); return err; }

主要步驟

根據網絡卡型號(82598/82599)選擇ixgbe_info

const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];

static const struct ixgbe_info *ixgbe_info_tbl[] = {
   [board_82598] = &ixgbe_82598_info,
   [board_82599] = &ixgbe_82599_info,
};

enum ixgbe_boards {
   board_82598,
   board_82599
};

分配net_device和ixgbe_adapter

netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES);

struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
{
   return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
}

struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
      void (*setup)(struct net_device *), unsigned int queue_count)
{
   struct netdev_queue *tx;
   struct net_device *dev;
   size_t alloc_size;
   struct net_device *p;

   BUG_ON(strlen(name) >= sizeof(dev->name));

   alloc_size = sizeof(struct net_device); // net_device的大小
   if (sizeof_priv) {
      /* ensure 32-byte alignment of private area */
      alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
      alloc_size += sizeof_priv; // 加上private data的大小
   }
   /* ensure 32-byte alignment of whole construct */
   alloc_size += NETDEV_ALIGN - 1;

   p = kzalloc(alloc_size, GFP_KERNEL); // 分配net_device和private data
   if (!p) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
      return NULL;
   }

   // 分配queue_count個netdev_queue(傳送佇列陣列),一個傳送佇列對應一個netdev_queue
   tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
   if (!tx) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate "
             "tx qdiscs.\n");
      goto free_p;
   }

   dev = PTR_ALIGN(p, NETDEV_ALIGN);
   dev->padded = (char *)dev - (char *)p;

   if (dev_addr_init(dev))
      goto free_tx;

   dev_unicast_init(dev);

   dev_net_set(dev, &init_net);

   dev->_tx = tx; // 儲存傳送佇列陣列
   dev->num_tx_queues = queue_count; // 設定傳送佇列數
   dev->real_num_tx_queues = queue_count; // 設定實際傳送佇列數

   dev->gso_max_size = GSO_MAX_SIZE;

   netdev_init_queues(dev); // 設定dev->_tx[i]->dev和dev->rx_queue->dev為dev

   INIT_LIST_HEAD(&dev->napi_list);
   dev->priv_flags = IFF_XMIT_DST_RELEASE;
   setup(dev); // 乙太網為ether_setup()
   strcpy(dev->name, name);
   return dev;

free_tx:
   kfree(tx);

free_p:
   kfree(p);
   return NULL;
}

static void netdev_init_queues(struct net_device *dev)
{
   netdev_init_one_queue(dev, &dev->rx_queue, NULL);
   netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
   spin_lock_init(&dev->tx_global_lock);
}

static void netdev_init_one_queue(struct net_device *dev,
              struct netdev_queue *queue,
              void *_unused)
{
   queue->dev = dev;
}

static inline void netdev_for_each_tx_queue(struct net_device *dev,
                   void (*f)(struct net_device *,
                        struct netdev_queue *,
                        void *),
                   void *arg)
{
   unsigned int i;

   for (i = 0; i < dev->num_tx_queues; i++)
      f(dev, &dev->_tx[i], arg);
}

void ether_setup(struct net_device *dev)
{
   dev->header_ops       = &eth_header_ops;
   dev->type     = ARPHRD_ETHER; // 乙太網格式
   dev->hard_header_len   = ETH_HLEN; // 14
   dev->mtu      = ETH_DATA_LEN; // 1500
   dev->addr_len     = ETH_ALEN; // 6
   dev->tx_queue_len  = 1000;    /* Ethernet wants good queues */
   dev->flags    = IFF_BROADCAST|IFF_MULTICAST;

   memset(dev->broadcast, 0xFF, ETH_ALEN);

}

讀取eeprom中的mac地址,寫入hw->mac.perm_addr

err = hw->mac.ops.reset_hw(hw);

struct ixgbe_info ixgbe_82599_info = {
   .mac                    = ixgbe_mac_82599EB,
   .get_invariants         = &ixgbe_get_invariants_82599,
   .mac_ops                = &mac_ops_82599,
   .eeprom_ops             = &eeprom_ops_82599,
   .phy_ops                = &phy_ops_82599,
};

static struct ixgbe_mac_operations mac_ops_82599 = {
   .init_hw                = &ixgbe_init_hw_generic,
   .reset_hw               = &ixgbe_reset_hw_82599,
   .start_hw               = &ixgbe_start_hw_82599,
   .clear_hw_cntrs         = &ixgbe_clear_hw_cntrs_generic,
   .get_media_type         = &ixgbe_get_media_type_82599,
   .get_supported_physical_layer = &ixgbe_get_supported_physical_layer_82599,
   .enable_rx_dma          = &ixgbe_enable_rx_dma_82599,
   .get_mac_addr           = &ixgbe_get_mac_addr_generic,
   .get_san_mac_addr       = &ixgbe_get_san_mac_addr_82599,
   .get_device_caps        = &ixgbe_get_device_caps_82599,
   .stop_adapter           = &ixgbe_stop_adapter_generic,
   .get_bus_info           = &ixgbe_get_bus_info_generic,
   .set_lan_id             = &ixgbe_set_lan_id_multi_port_pcie,
   .read_analog_reg8       = &ixgbe_read_analog_reg8_82599,
   .write_analog_reg8      = &ixgbe_write_analog_reg8_82599,
   .setup_link             = &ixgbe_setup_mac_link_82599,
   .check_link             = &ixgbe_check_mac_link_82599,
   .get_link_capabilities  = &ixgbe_get_link_capabilities_82599,
   .led_on                 = &ixgbe_led_on_generic,
   .led_off                = &ixgbe_led_off_generic,
   .blink_led_start        = &ixgbe_blink_led_start_generic,
   .blink_led_stop         = &ixgbe_blink_led_stop_generic,
   .set_rar                = &ixgbe_set_rar_generic,
   .clear_rar              = &ixgbe_clear_rar_generic,
   .set_vmdq               = &ixgbe_set_vmdq_82599,
   .clear_vmdq             = &ixgbe_clear_vmdq_82599,
   .init_rx_addrs          = &ixgbe_init_rx_addrs_generic,
   .update_uc_addr_list    = &ixgbe_update_uc_addr_list_generic,
   .update_mc_addr_list    = &ixgbe_update_mc_addr_list_generic,
   .enable_mc              = &ixgbe_enable_mc_generic,
   .disable_mc             = &ixgbe_disable_mc_generic,
   .clear_vfta             = &ixgbe_clear_vfta_82599,
   .set_vfta               = &ixgbe_set_vfta_82599,
   .fc_enable               = &ixgbe_fc_enable_generic,
   .init_uta_tables        = &ixgbe_init_uta_tables_82599,
   .setup_sfp              = &ixgbe_setup_sfp_modules_82599,
};

static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
{
   s32 status = 0;
   u32 ctrl, ctrl_ext;
   u32 i;
   u32 autoc;
   u32 autoc2;

   /* Call adapter stop to disable tx/rx and clear interrupts */
   hw->mac.ops.stop_adapter(hw);

   /* PHY ops must be identified and initialized prior to reset */

   /* Init PHY and function pointers, perform SFP setup */
   status = hw->phy.ops.init(hw);

   if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
      goto reset_hw_out;

   /* Setup SFP module if there is one present. */
   if (hw->phy.sfp_setup_needed) {
      status = hw->mac.ops.setup_sfp(hw);
      hw->phy.sfp_setup_needed = false;
   }

   /* Reset PHY */
   if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
      hw->phy.ops.reset(hw);

   /*
    * Prevent the PCI-E bus from from hanging by disabling PCI-E master
    * access and verify no pending requests before reset
    */
   status = ixgbe_disable_pcie_master(hw);
   if (status != 0) {
      status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
      hw_dbg(hw, "PCI-E Master disable polling has failed.\n");
   }

   /*
    * Issue global reset to the MAC.  This needs to be a SW reset.
    * If link reset is used, it might reset the MAC when mng is using it
    */
   ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
   IXGBE_WRITE_REG(hw, IXGBE_CTRL, (ctrl | IXGBE_CTRL_RST));
   IXGBE_WRITE_FLUSH(hw);

   /* Poll for reset bit to self-clear indicating reset is complete */
   for (i = 0; i < 10; i++) {
      udelay(1);
      ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
      if (!(ctrl & IXGBE_CTRL_RST))
         break;
   }
   if (ctrl & IXGBE_CTRL_RST) {
      status = IXGBE_ERR_RESET_FAILED;
      hw_dbg(hw, "Reset polling failed to complete.\n");
   }
   /* Clear PF Reset Done bit so PF/VF Mail Ops can work */
   ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
   ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
   IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);

   msleep(50);



   /*
    * Store the original AUTOC/AUTOC2 values if they have not been
    * stored off yet.  Otherwise restore the stored original
    * values since the reset operation sets back to defaults.
    */
   autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
   autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
   if (hw->mac.orig_link_settings_stored == false) {
      hw->mac.orig_autoc = autoc;
      hw->mac.orig_autoc2 = autoc2;
      hw->mac.orig_link_settings_stored = true;
   } else {
      if (autoc != hw->mac.orig_autoc)
         IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
                         IXGBE_AUTOC_AN_RESTART));

      if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
          (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
         autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
         autoc2 |= (hw->mac.orig_autoc2 &
                    IXGBE_AUTOC2_UPPER_MASK);
         IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
      }
   }

   /*
    * Store MAC address from RAR0, clear receive address registers, and
    * clear the multicast table.  Also reset num_rar_entries to 128,
    * since we modify this value when programming the SAN MAC address.
    */
   hw->mac.num_rar_entries = 128;
   hw->mac.ops.init_rx_addrs(hw);

   /* Store the permanent mac address */
   hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); // 讀取eeprom中的mac地址,寫入hw->mac.perm_addr

   /* Store the permanent SAN mac address */
   hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);

   /* Add the SAN MAC address to the RAR only if it's a valid address */
   if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
      hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
                          hw->mac.san_addr, 0, IXGBE_RAH_AV);

      /* Reserve the last RAR for the SAN MAC address */
      hw->mac.num_rar_entries--;
   }

reset_hw_out:
   return status;
}

s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
{
   u32 rar_high;
   u32 rar_low;
   u16 i;

   rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
   rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));

   for (i = 0; i < 4; i++)
      mac_addr[i] = (u8)(rar_low >> (i*8));

   for (i = 0; i < 2; i++)
      mac_addr[i+4] = (u8)(rar_high >> (i*8));

   return 0;
}

#define IXGBE_RAL(_i)   (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
                         (0x0A200 + ((_i) * 8)))
#define IXGBE_RAH(_i)   (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
                         (0x0A204 + ((_i) * 8)))

ixgbe_init_interrupt_scheme()

int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
{
   int err;

   /* Number of supported queues */
   ixgbe_set_num_queues(adapter); // 根據FDIR/RSS設定adapter->num_tx/rx_queues

   err = ixgbe_set_interrupt_capability(adapter); // 向PCI子系統請求中斷
   if (err) {
      DPRINTK(PROBE, ERR, "Unable to setup interrupt capabilities\n");
      goto err_set_interrupt;
   }

   err = ixgbe_alloc_q_vectors(adapter); // 設定poll函式,分配ixgbe_q_vector,初始化napi並加入napi_list
   if (err) {
      DPRINTK(PROBE, ERR, "Unable to allocate memory for queue "
              "vectors\n");
      goto err_alloc_q_vectors;
   }

   err = ixgbe_alloc_queues(adapter); // 分配發送/接收ring陣列
   if (err) {
      DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n");
      goto err_alloc_queues;
   }

   DPRINTK(DRV, INFO, "Multiqueue %s: Rx Queue count = %u, "
           "Tx Queue count = %u\n",
           (adapter->num_rx_queues > 1) ? "Enabled" :
           "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);

   set_bit(__IXGBE_DOWN, &adapter->state);

   return 0;

err_alloc_queues:
   ixgbe_free_q_vectors(adapter);
err_alloc_q_vectors:
   ixgbe_reset_interrupt_capability(adapter);
err_set_interrupt:
   return err;
}

ixgbe_set_num_queues()

static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
{
#ifdef IXGBE_FCOE
   if (ixgbe_set_fcoe_queues(adapter))
      goto done;

#endif /* IXGBE_FCOE */
#ifdef CONFIG_IXGBE_DCB
   if (ixgbe_set_dcb_queues(adapter))
      goto done;

#endif
   if (ixgbe_set_fdir_queues(adapter))
      goto done;

   if (ixgbe_set_rss_queues(adapter))
      goto done;

   /* fallback to base case */
   adapter->num_rx_queues = 1;
   adapter->num_tx_queues = 1;

done:
   /* Notify the stack of the (possibly) reduced Tx Queue count. */
   adapter->netdev->real_num_tx_queues = adapter->num_tx_queues; // 設定實際傳送佇列數
}

static bool inline ixgbe_set_fdir_queues(struct ixgbe_adapter *adapter)
{
   bool ret = false;
   struct ixgbe_ring_feature *f_fdir = &adapter->ring_feature[RING_F_FDIR];

   // min(CPU數, IXGBE_MAX_FDIR_INDICES(64))
   f_fdir->indices = min((int)num_online_cpus(), f_fdir->indices);
   f_fdir->mask = 0;

   /* Flow Director must have RSS enabled */
   if (adapter->flags & IXGBE_FLAG_RSS_ENABLED &&
       ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
        (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)))) {
      adapter->num_tx_queues = f_fdir->indices; // 設定傳送佇列數為min(CPU數, 64)
      adapter->num_rx_queues = f_fdir->indices; // 設定接收佇列數為min(CPU數, 64)
      ret = true;
   } else {
      adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
      adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
   }
   return ret;
}

static inline bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
{
   bool ret = false;
   struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_RSS];

   if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
      f->mask = 0xF;
      adapter->num_rx_queues = f->indices; // 設定接收佇列數為min(CPU數, 16)
      adapter->num_tx_queues = f->indices; // 設定傳送佇列數為min(CPU數, 16)
      ret = true;
   } else {
      ret = false;
   }

   return ret;
}

ixgbe_set_interrupt_capability()

static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
{
   struct ixgbe_hw *hw = &adapter->hw;
   int err = 0;
   int vector, v_budget;

   /*
    * It's easy to be greedy for MSI-X vectors, but it really
    * doesn't do us much good if we have a lot more vectors
    * than CPU's.  So let's be conservative and only ask for
    * (roughly) twice the number of vectors as there are CPU's.
    */
   // 計算ixgbe0的msix中斷數,NON_Q_VECTORS對應的misx中斷(LSC等)繫結ixgbe0所在NUMA的所有CPU
   v_budget = min(adapter->num_rx_queues + adapter->num_tx_queues,
                  (int)(num_online_cpus() * 2)) + NON_Q_VECTORS;

   /*
    * At the same time, hardware can only support a maximum of
    * hw.mac->max_msix_vectors vectors.  With features
    * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
    * descriptor queues supported by our device.  Thus, we cap it off in
    * those rare cases where the cpu count also exceeds our vector limit.
    */
   v_budget = min(v_budget, (int)hw->mac.max_msix_vectors);

   /* A failure in MSI-X entry allocation isn't fatal, but it does
    * mean we disable MSI-X capabilities of the adapter. */
   // 分配v_budget個msix_entry,地址賦給adapter->msix_entries
   adapter->msix_entries = kcalloc(v_budget,
                                   sizeof(struct msix_entry), GFP_KERNEL);
   if (adapter->msix_entries) {
      for (vector = 0; vector < v_budget; vector++)
         adapter->msix_entries[vector].entry = vector;

      ixgbe_acquire_msix_vectors(adapter, v_budget); // 向PCI子系統請求v_budget個msix中斷

      if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
         goto out;
   }

   adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
   adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
   adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
   adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
   adapter->atr_sample_rate = 0;
   ixgbe_set_num_queues(adapter);

   err = pci_enable_msi(adapter->pdev); // 向PCI子系統請求1個msi中斷
   if (!err) {
      adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
   } else {
      DPRINTK(HW, DEBUG, "Unable to allocate MSI interrupt, "
              "falling back to legacy.  Error: %d\n", err);
      /* reset err */
      err = 0;
   }

out:
   return err;
}

static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
                                       int vectors)
{
   int err, vector_threshold;

   /* We'll want at least 3 (vector_threshold):
    * 1) TxQ[0] Cleanup
    * 2) RxQ[0] Cleanup
    * 3) Other (Link Status Change, etc.)
    * 4) TCP Timer (optional)
    */
   vector_threshold = MIN_MSIX_COUNT;

   /* The more we get, the more we will assign to Tx/Rx Cleanup
    * for the separate queues...where Rx Cleanup >= Tx Cleanup.
    * Right now, we simply care about how many we'll get; we'll
    * set them up later while requesting irq's.
    */
   while (vectors >= vector_threshold) {
      // 向PCI子系統請求vectors個msix中斷,將中斷號寫入adapter->msix_entries[i].vector
      err = pci_enable_msix(adapter->pdev, adapter->msix_entries,
                            vectors);
      if (!err) /* Success in acquiring all requested vectors. */
         break;
      else if (err < 0)
         vectors = 0; /* Nasty failure, quit now */
      else /* err == number of vectors we should try again with */
         vectors = err;
   }

   if (vectors < vector_threshold) {
      /* Can't allocate enough MSI-X interrupts?  Oh well.
       * This just means we'll go with either a single MSI
       * vector or fall back to legacy interrupts.
       */
      DPRINTK(HW, DEBUG, "Unable to allocate MSI-X interrupts\n");
      adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
      kfree(adapter->msix_entries);
      adapter->msix_entries = NULL;
   } else {
      adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; /* Woot! */
      /*
       * Adjust for only the vectors we'll use, which is minimum
       * of max_msix_q_vectors + NON_Q_VECTORS, or the number of
       * vectors we were allocated.
       */
      adapter->num_msix_vectors = min(vectors,
                         adapter->max_msix_q_vectors + NON_Q_VECTORS);
   }
}

ixgbe_alloc_q_vectors()和ixgbe_alloc_queues()

static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
{
   int q_idx, num_q_vectors;
   struct ixgbe_q_vector *q_vector;
   int napi_vectors;
   int (*poll)(struct napi_struct *, int);

   if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { // 使用MSIX(Message Signaled Interrupt-X)
      // 去掉繫結ixgbe0所在NUMA的所有CPU的msix中斷(LSC等)
      num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
      napi_vectors = adapter->num_rx_queues;
      poll = &ixgbe_clean_rxtx_many; // 設定poll函式為ixgbe_clean_rxtx_many()
   } else { // 其它
      num_q_vectors = 1;
      napi_vectors = 1;
      poll = &ixgbe_poll; // 設定poll函式為ixgbe_poll()
   }

   for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
      q_vector = kzalloc(sizeof(struct ixgbe_q_vector), GFP_KERNEL); // 分配ixgbe_q_vector
      if (!q_vector)
         goto err_out;
      q_vector->adapter = adapter;
      if (q_vector->txr_count && !q_vector->rxr_count)
         q_vector->eitr = adapter->tx_eitr_param;
      else
         q_vector->eitr = adapter->rx_eitr_param;
      q_vector->v_idx = q_idx;
      /* 初始化q_vector->napi並加入adapter->netdev的napi_list,
         其中poll函式為ixgbe_clean_rxtx_many()/ixgbe_poll(),一次poll的最大報文數為64 */
      netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64);
      adapter->q_vector[q_idx] = q_vector; // 地址賦給adapter->q_vector[q_idx]
   }

   return 0;

err_out:
   while (q_idx) {
      q_idx--;
      q_vector = adapter->q_vector[q_idx];
      netif_napi_del(&q_vector->napi);
      kfree(q_vector);
      adapter->q_vector[q_idx] = NULL;
   }
   return -ENOMEM;
}

static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
{
   int i;

   // 分配num_tx_queues個ixgbe_ring(傳送ring陣列),地址賦給adapter->tx_ring
   adapter->tx_ring = kcalloc(adapter->num_tx_queues,
                              sizeof(struct ixgbe_ring), GFP_KERNEL);
   if (!adapter->tx_ring)
      goto err_tx_ring_allocation;

   // 分配num_rx_queues個ixgbe_ring(接收ring陣列),地址賦給adapter->rx_ring
   adapter->rx_ring = kcalloc(adapter->num_rx_queues,
                              sizeof(struct ixgbe_ring), GFP_KERNEL);
   if (!adapter->rx_ring)
      goto err_rx_ring_allocation;

   for (i = 0; i < adapter->num_tx_queues; i++) {
      adapter->tx_ring[i].count = adapter->tx_ring_count; // 設定tx_ring[i].count
      adapter->tx_ring[i].queue_index = i; // 設定tx_ring[i].queue_index
   }

   for (i = 0; i < adapter->num_rx_queues; i++) {
      adapter->rx_ring[i].count = adapter->rx_ring_count; // 設定rx_ring[i].count
      adapter->rx_ring[i].queue_index = i; // 設定rx_ring[i].queue_index
   }

   ixgbe_cache_ring_register(adapter); // 設定tx/rx_ring[i].reg_idx

   return 0;

err_rx_ring_allocation:
   kfree(adapter->tx_ring);
err_tx_ring_allocation:
   return -ENOMEM;
}

netif_napi_add()

初始化napi,加入dev->napi_list

void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
          int (*poll)(struct napi_struct *, int), int weight)
{
   INIT_LIST_HEAD(&napi->poll_list);
   napi->gro_count = 0;
   napi->gro_list = NULL;
   napi->skb = NULL;
   napi->poll = poll; // 設定poll函式
   napi->weight = weight; // 一次poll的最大報文數
   list_add(&napi->dev_list, &dev->napi_list); // 加入dev->napi_list
   napi->dev = dev;
#ifdef CONFIG_NETPOLL
   spin_lock_init(&napi->poll_lock);
   napi->poll_owner = -1;
#endif
   set_bit(NAPI_STATE_SCHED, &napi->state); // 設定NAPI狀態
}