vnic_main.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858
  1. /*
  2. * Copyright(c) 2017 - 2018 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. /*
  48. * This file contains HFI1 support for VNIC functionality
  49. */
  50. #include <linux/io.h>
  51. #include <linux/if_vlan.h>
  52. #include "vnic.h"
  53. #define HFI_TX_TIMEOUT_MS 1000
  54. #define HFI1_VNIC_RCV_Q_SIZE 1024
  55. #define HFI1_VNIC_UP 0
  56. static DEFINE_SPINLOCK(vport_cntr_lock);
  57. static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
  58. {
  59. unsigned int rcvctrl_ops = 0;
  60. int ret;
  61. uctxt->do_interrupt = &handle_receive_interrupt;
  62. /* Now allocate the RcvHdr queue and eager buffers. */
  63. ret = hfi1_create_rcvhdrq(dd, uctxt);
  64. if (ret)
  65. goto done;
  66. ret = hfi1_setup_eagerbufs(uctxt);
  67. if (ret)
  68. goto done;
  69. if (uctxt->rcvhdrtail_kvaddr)
  70. clear_rcvhdrtail(uctxt);
  71. rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
  72. rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
  73. if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
  74. rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
  75. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
  76. rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
  77. if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
  78. rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
  79. if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
  80. rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
  81. hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
  82. done:
  83. return ret;
  84. }
  85. static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
  86. struct hfi1_ctxtdata **vnic_ctxt)
  87. {
  88. struct hfi1_ctxtdata *uctxt;
  89. int ret;
  90. if (dd->flags & HFI1_FROZEN)
  91. return -EIO;
  92. ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
  93. if (ret < 0) {
  94. dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
  95. return -ENOMEM;
  96. }
  97. uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
  98. HFI1_CAP_KGET(NODROP_RHQ_FULL) |
  99. HFI1_CAP_KGET(NODROP_EGR_FULL) |
  100. HFI1_CAP_KGET(DMA_RTAIL);
  101. uctxt->seq_cnt = 1;
  102. uctxt->is_vnic = true;
  103. hfi1_set_vnic_msix_info(uctxt);
  104. hfi1_stats.sps_ctxts++;
  105. dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
  106. *vnic_ctxt = uctxt;
  107. return 0;
  108. }
  109. static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
  110. struct hfi1_ctxtdata *uctxt)
  111. {
  112. dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
  113. flush_wc();
  114. hfi1_reset_vnic_msix_info(uctxt);
  115. /*
  116. * Disable receive context and interrupt available, reset all
  117. * RcvCtxtCtrl bits to default values.
  118. */
  119. hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
  120. HFI1_RCVCTRL_TIDFLOW_DIS |
  121. HFI1_RCVCTRL_INTRAVAIL_DIS |
  122. HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
  123. HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
  124. HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
  125. uctxt->event_flags = 0;
  126. hfi1_clear_tids(uctxt);
  127. hfi1_clear_ctxt_pkey(dd, uctxt);
  128. hfi1_stats.sps_ctxts--;
  129. hfi1_free_ctxt(uctxt);
  130. }
  131. void hfi1_vnic_setup(struct hfi1_devdata *dd)
  132. {
  133. idr_init(&dd->vnic.vesw_idr);
  134. }
  135. void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
  136. {
  137. idr_destroy(&dd->vnic.vesw_idr);
  138. }
  139. #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
  140. u64 *src64, *dst64; \
  141. for (src64 = &qstats->x_grp.unicast, \
  142. dst64 = &stats->x_grp.unicast; \
  143. dst64 <= &stats->x_grp.s_1519_max;) { \
  144. *dst64++ += *src64++; \
  145. } \
  146. } while (0)
  147. /* hfi1_vnic_update_stats - update statistics */
  148. static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
  149. struct opa_vnic_stats *stats)
  150. {
  151. struct net_device *netdev = vinfo->netdev;
  152. u8 i;
  153. /* add tx counters on different queues */
  154. for (i = 0; i < vinfo->num_tx_q; i++) {
  155. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  156. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  157. stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
  158. stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
  159. stats->tx_drop_state += qstats->tx_drop_state;
  160. stats->tx_dlid_zero += qstats->tx_dlid_zero;
  161. SUM_GRP_COUNTERS(stats, qstats, tx_grp);
  162. stats->netstats.tx_packets += qnstats->tx_packets;
  163. stats->netstats.tx_bytes += qnstats->tx_bytes;
  164. }
  165. /* add rx counters on different queues */
  166. for (i = 0; i < vinfo->num_rx_q; i++) {
  167. struct opa_vnic_stats *qstats = &vinfo->stats[i];
  168. struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
  169. stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
  170. stats->netstats.rx_nohandler += qnstats->rx_nohandler;
  171. stats->rx_drop_state += qstats->rx_drop_state;
  172. stats->rx_oversize += qstats->rx_oversize;
  173. stats->rx_runt += qstats->rx_runt;
  174. SUM_GRP_COUNTERS(stats, qstats, rx_grp);
  175. stats->netstats.rx_packets += qnstats->rx_packets;
  176. stats->netstats.rx_bytes += qnstats->rx_bytes;
  177. }
  178. stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
  179. stats->netstats.tx_carrier_errors +
  180. stats->tx_drop_state + stats->tx_dlid_zero;
  181. stats->netstats.tx_dropped = stats->netstats.tx_errors;
  182. stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
  183. stats->netstats.rx_nohandler +
  184. stats->rx_drop_state + stats->rx_oversize +
  185. stats->rx_runt;
  186. stats->netstats.rx_dropped = stats->netstats.rx_errors;
  187. netdev->stats.tx_packets = stats->netstats.tx_packets;
  188. netdev->stats.tx_bytes = stats->netstats.tx_bytes;
  189. netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
  190. netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
  191. netdev->stats.tx_errors = stats->netstats.tx_errors;
  192. netdev->stats.tx_dropped = stats->netstats.tx_dropped;
  193. netdev->stats.rx_packets = stats->netstats.rx_packets;
  194. netdev->stats.rx_bytes = stats->netstats.rx_bytes;
  195. netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
  196. netdev->stats.multicast = stats->rx_grp.mcastbcast;
  197. netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
  198. netdev->stats.rx_errors = stats->netstats.rx_errors;
  199. netdev->stats.rx_dropped = stats->netstats.rx_dropped;
  200. }
  201. /* update_len_counters - update pkt's len histogram counters */
  202. static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
  203. int len)
  204. {
  205. /* account for 4 byte FCS */
  206. if (len >= 1515)
  207. grp->s_1519_max++;
  208. else if (len >= 1020)
  209. grp->s_1024_1518++;
  210. else if (len >= 508)
  211. grp->s_512_1023++;
  212. else if (len >= 252)
  213. grp->s_256_511++;
  214. else if (len >= 124)
  215. grp->s_128_255++;
  216. else if (len >= 61)
  217. grp->s_65_127++;
  218. else
  219. grp->s_64++;
  220. }
  221. /* hfi1_vnic_update_tx_counters - update transmit counters */
  222. static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
  223. u8 q_idx, struct sk_buff *skb, int err)
  224. {
  225. struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
  226. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  227. struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
  228. u16 vlan_tci;
  229. stats->netstats.tx_packets++;
  230. stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
  231. update_len_counters(tx_grp, skb->len);
  232. /* rest of the counts are for good packets only */
  233. if (unlikely(err))
  234. return;
  235. if (is_multicast_ether_addr(mac_hdr->h_dest))
  236. tx_grp->mcastbcast++;
  237. else
  238. tx_grp->unicast++;
  239. if (!__vlan_get_tag(skb, &vlan_tci))
  240. tx_grp->vlan++;
  241. else
  242. tx_grp->untagged++;
  243. }
  244. /* hfi1_vnic_update_rx_counters - update receive counters */
  245. static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
  246. u8 q_idx, struct sk_buff *skb, int err)
  247. {
  248. struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
  249. struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
  250. struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
  251. u16 vlan_tci;
  252. stats->netstats.rx_packets++;
  253. stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
  254. update_len_counters(rx_grp, skb->len);
  255. /* rest of the counts are for good packets only */
  256. if (unlikely(err))
  257. return;
  258. if (is_multicast_ether_addr(mac_hdr->h_dest))
  259. rx_grp->mcastbcast++;
  260. else
  261. rx_grp->unicast++;
  262. if (!__vlan_get_tag(skb, &vlan_tci))
  263. rx_grp->vlan++;
  264. else
  265. rx_grp->untagged++;
  266. }
  267. /* This function is overloaded for opa_vnic specific implementation */
  268. static void hfi1_vnic_get_stats64(struct net_device *netdev,
  269. struct rtnl_link_stats64 *stats)
  270. {
  271. struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
  272. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  273. hfi1_vnic_update_stats(vinfo, vstats);
  274. }
  275. static u64 create_bypass_pbc(u32 vl, u32 dw_len)
  276. {
  277. u64 pbc;
  278. pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
  279. | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
  280. | PBC_PACKET_BYPASS
  281. | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
  282. | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
  283. return pbc;
  284. }
  285. /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
  286. static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
  287. u8 q_idx)
  288. {
  289. netif_stop_subqueue(vinfo->netdev, q_idx);
  290. if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
  291. return;
  292. netif_start_subqueue(vinfo->netdev, q_idx);
  293. }
  294. static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
  295. struct net_device *netdev)
  296. {
  297. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  298. u8 pad_len, q_idx = skb->queue_mapping;
  299. struct hfi1_devdata *dd = vinfo->dd;
  300. struct opa_vnic_skb_mdata *mdata;
  301. u32 pkt_len, total_len;
  302. int err = -EINVAL;
  303. u64 pbc;
  304. v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
  305. if (unlikely(!netif_oper_up(netdev))) {
  306. vinfo->stats[q_idx].tx_drop_state++;
  307. goto tx_finish;
  308. }
  309. /* take out meta data */
  310. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  311. skb_pull(skb, sizeof(*mdata));
  312. if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
  313. vinfo->stats[q_idx].tx_dlid_zero++;
  314. goto tx_finish;
  315. }
  316. /* add tail padding (for 8 bytes size alignment) and icrc */
  317. pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
  318. pad_len += OPA_VNIC_ICRC_TAIL_LEN;
  319. /*
  320. * pkt_len is how much data we have to write, includes header and data.
  321. * total_len is length of the packet in Dwords plus the PBC should not
  322. * include the CRC.
  323. */
  324. pkt_len = (skb->len + pad_len) >> 2;
  325. total_len = pkt_len + 2; /* PBC + packet */
  326. pbc = create_bypass_pbc(mdata->vl, total_len);
  327. skb_get(skb);
  328. v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
  329. err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
  330. if (unlikely(err)) {
  331. if (err == -ENOMEM)
  332. vinfo->stats[q_idx].netstats.tx_fifo_errors++;
  333. else if (err != -EBUSY)
  334. vinfo->stats[q_idx].netstats.tx_carrier_errors++;
  335. }
  336. /* remove the header before updating tx counters */
  337. skb_pull(skb, OPA_VNIC_HDR_LEN);
  338. if (unlikely(err == -EBUSY)) {
  339. hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
  340. dev_kfree_skb_any(skb);
  341. return NETDEV_TX_BUSY;
  342. }
  343. tx_finish:
  344. /* update tx counters */
  345. hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
  346. dev_kfree_skb_any(skb);
  347. return NETDEV_TX_OK;
  348. }
  349. static u16 hfi1_vnic_select_queue(struct net_device *netdev,
  350. struct sk_buff *skb,
  351. struct net_device *sb_dev,
  352. select_queue_fallback_t fallback)
  353. {
  354. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  355. struct opa_vnic_skb_mdata *mdata;
  356. struct sdma_engine *sde;
  357. mdata = (struct opa_vnic_skb_mdata *)skb->data;
  358. sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
  359. return sde->this_idx;
  360. }
  361. /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
  362. static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
  363. struct sk_buff *skb)
  364. {
  365. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  366. int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
  367. int rc = -EFAULT;
  368. skb_pull(skb, OPA_VNIC_HDR_LEN);
  369. /* Validate Packet length */
  370. if (unlikely(skb->len > max_len))
  371. vinfo->stats[rxq->idx].rx_oversize++;
  372. else if (unlikely(skb->len < ETH_ZLEN))
  373. vinfo->stats[rxq->idx].rx_runt++;
  374. else
  375. rc = 0;
  376. return rc;
  377. }
  378. static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
  379. {
  380. unsigned char *pad_info;
  381. struct sk_buff *skb;
  382. skb = skb_dequeue(&rxq->skbq);
  383. if (unlikely(!skb))
  384. return NULL;
  385. /* remove tail padding and icrc */
  386. pad_info = skb->data + skb->len - 1;
  387. skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
  388. ((*pad_info) & 0x7)));
  389. return skb;
  390. }
  391. /* hfi1_vnic_handle_rx - handle skb receive */
  392. static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
  393. int *work_done, int work_to_do)
  394. {
  395. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  396. struct sk_buff *skb;
  397. int rc;
  398. while (1) {
  399. if (*work_done >= work_to_do)
  400. break;
  401. skb = hfi1_vnic_get_skb(rxq);
  402. if (unlikely(!skb))
  403. break;
  404. rc = hfi1_vnic_decap_skb(rxq, skb);
  405. /* update rx counters */
  406. hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
  407. if (unlikely(rc)) {
  408. dev_kfree_skb_any(skb);
  409. continue;
  410. }
  411. skb_checksum_none_assert(skb);
  412. skb->protocol = eth_type_trans(skb, rxq->netdev);
  413. napi_gro_receive(&rxq->napi, skb);
  414. (*work_done)++;
  415. }
  416. }
  417. /* hfi1_vnic_napi - napi receive polling callback function */
  418. static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
  419. {
  420. struct hfi1_vnic_rx_queue *rxq = container_of(napi,
  421. struct hfi1_vnic_rx_queue, napi);
  422. struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
  423. int work_done = 0;
  424. v_dbg("napi %d budget %d\n", rxq->idx, budget);
  425. hfi1_vnic_handle_rx(rxq, &work_done, budget);
  426. v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
  427. if (work_done < budget)
  428. napi_complete(napi);
  429. return work_done;
  430. }
  431. void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
  432. {
  433. struct hfi1_devdata *dd = packet->rcd->dd;
  434. struct hfi1_vnic_vport_info *vinfo = NULL;
  435. struct hfi1_vnic_rx_queue *rxq;
  436. struct sk_buff *skb;
  437. int l4_type, vesw_id = -1;
  438. u8 q_idx;
  439. l4_type = hfi1_16B_get_l4(packet->ebuf);
  440. if (likely(l4_type == OPA_16B_L4_ETHR)) {
  441. vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
  442. vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
  443. /*
  444. * In case of invalid vesw id, count the error on
  445. * the first available vport.
  446. */
  447. if (unlikely(!vinfo)) {
  448. struct hfi1_vnic_vport_info *vinfo_tmp;
  449. int id_tmp = 0;
  450. vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
  451. if (vinfo_tmp) {
  452. spin_lock(&vport_cntr_lock);
  453. vinfo_tmp->stats[0].netstats.rx_nohandler++;
  454. spin_unlock(&vport_cntr_lock);
  455. }
  456. }
  457. }
  458. if (unlikely(!vinfo)) {
  459. dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
  460. l4_type, vesw_id, packet->rcd->ctxt);
  461. return;
  462. }
  463. q_idx = packet->rcd->vnic_q_idx;
  464. rxq = &vinfo->rxq[q_idx];
  465. if (unlikely(!netif_oper_up(vinfo->netdev))) {
  466. vinfo->stats[q_idx].rx_drop_state++;
  467. skb_queue_purge(&rxq->skbq);
  468. return;
  469. }
  470. if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
  471. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  472. return;
  473. }
  474. skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
  475. if (unlikely(!skb)) {
  476. vinfo->stats[q_idx].netstats.rx_fifo_errors++;
  477. return;
  478. }
  479. memcpy(skb->data, packet->ebuf, packet->tlen);
  480. skb_put(skb, packet->tlen);
  481. skb_queue_tail(&rxq->skbq, skb);
  482. if (napi_schedule_prep(&rxq->napi)) {
  483. v_dbg("napi %d scheduling\n", q_idx);
  484. __napi_schedule(&rxq->napi);
  485. }
  486. }
  487. static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
  488. {
  489. struct hfi1_devdata *dd = vinfo->dd;
  490. struct net_device *netdev = vinfo->netdev;
  491. int i, rc;
  492. /* ensure virtual eth switch id is valid */
  493. if (!vinfo->vesw_id)
  494. return -EINVAL;
  495. rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
  496. vinfo->vesw_id + 1, GFP_NOWAIT);
  497. if (rc < 0)
  498. return rc;
  499. for (i = 0; i < vinfo->num_rx_q; i++) {
  500. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  501. skb_queue_head_init(&rxq->skbq);
  502. napi_enable(&rxq->napi);
  503. }
  504. netif_carrier_on(netdev);
  505. netif_tx_start_all_queues(netdev);
  506. set_bit(HFI1_VNIC_UP, &vinfo->flags);
  507. return 0;
  508. }
  509. static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
  510. {
  511. struct hfi1_devdata *dd = vinfo->dd;
  512. u8 i;
  513. clear_bit(HFI1_VNIC_UP, &vinfo->flags);
  514. netif_carrier_off(vinfo->netdev);
  515. netif_tx_disable(vinfo->netdev);
  516. idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
  517. /* ensure irqs see the change */
  518. hfi1_vnic_synchronize_irq(dd);
  519. /* remove unread skbs */
  520. for (i = 0; i < vinfo->num_rx_q; i++) {
  521. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  522. napi_disable(&rxq->napi);
  523. skb_queue_purge(&rxq->skbq);
  524. }
  525. }
  526. static int hfi1_netdev_open(struct net_device *netdev)
  527. {
  528. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  529. int rc;
  530. mutex_lock(&vinfo->lock);
  531. rc = hfi1_vnic_up(vinfo);
  532. mutex_unlock(&vinfo->lock);
  533. return rc;
  534. }
  535. static int hfi1_netdev_close(struct net_device *netdev)
  536. {
  537. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  538. mutex_lock(&vinfo->lock);
  539. if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
  540. hfi1_vnic_down(vinfo);
  541. mutex_unlock(&vinfo->lock);
  542. return 0;
  543. }
  544. static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
  545. struct hfi1_ctxtdata **vnic_ctxt)
  546. {
  547. int rc;
  548. rc = allocate_vnic_ctxt(dd, vnic_ctxt);
  549. if (rc) {
  550. dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
  551. return rc;
  552. }
  553. rc = setup_vnic_ctxt(dd, *vnic_ctxt);
  554. if (rc) {
  555. dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
  556. deallocate_vnic_ctxt(dd, *vnic_ctxt);
  557. *vnic_ctxt = NULL;
  558. }
  559. return rc;
  560. }
  561. static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
  562. {
  563. struct hfi1_devdata *dd = vinfo->dd;
  564. int i, rc = 0;
  565. mutex_lock(&hfi1_mutex);
  566. if (!dd->vnic.num_vports) {
  567. rc = hfi1_vnic_txreq_init(dd);
  568. if (rc)
  569. goto txreq_fail;
  570. dd->vnic.msix_idx = dd->first_dyn_msix_idx;
  571. }
  572. for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
  573. rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
  574. if (rc)
  575. break;
  576. hfi1_rcd_get(dd->vnic.ctxt[i]);
  577. dd->vnic.ctxt[i]->vnic_q_idx = i;
  578. }
  579. if (i < vinfo->num_rx_q) {
  580. /*
  581. * If required amount of contexts is not
  582. * allocated successfully then remaining contexts
  583. * are released.
  584. */
  585. while (i-- > dd->vnic.num_ctxt) {
  586. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  587. hfi1_rcd_put(dd->vnic.ctxt[i]);
  588. dd->vnic.ctxt[i] = NULL;
  589. }
  590. goto alloc_fail;
  591. }
  592. if (dd->vnic.num_ctxt != i) {
  593. dd->vnic.num_ctxt = i;
  594. hfi1_init_vnic_rsm(dd);
  595. }
  596. dd->vnic.num_vports++;
  597. hfi1_vnic_sdma_init(vinfo);
  598. alloc_fail:
  599. if (!dd->vnic.num_vports)
  600. hfi1_vnic_txreq_deinit(dd);
  601. txreq_fail:
  602. mutex_unlock(&hfi1_mutex);
  603. return rc;
  604. }
  605. static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
  606. {
  607. struct hfi1_devdata *dd = vinfo->dd;
  608. int i;
  609. mutex_lock(&hfi1_mutex);
  610. if (--dd->vnic.num_vports == 0) {
  611. for (i = 0; i < dd->vnic.num_ctxt; i++) {
  612. deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
  613. hfi1_rcd_put(dd->vnic.ctxt[i]);
  614. dd->vnic.ctxt[i] = NULL;
  615. }
  616. hfi1_deinit_vnic_rsm(dd);
  617. dd->vnic.num_ctxt = 0;
  618. hfi1_vnic_txreq_deinit(dd);
  619. }
  620. mutex_unlock(&hfi1_mutex);
  621. }
  622. static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
  623. {
  624. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  625. bool reopen = false;
  626. /*
  627. * If vesw_id is being changed, and if the vnic port is up,
  628. * reset the vnic port to ensure new vesw_id gets picked up
  629. */
  630. if (id != vinfo->vesw_id) {
  631. mutex_lock(&vinfo->lock);
  632. if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
  633. hfi1_vnic_down(vinfo);
  634. reopen = true;
  635. }
  636. vinfo->vesw_id = id;
  637. if (reopen)
  638. hfi1_vnic_up(vinfo);
  639. mutex_unlock(&vinfo->lock);
  640. }
  641. }
  642. /* netdev ops */
  643. static const struct net_device_ops hfi1_netdev_ops = {
  644. .ndo_open = hfi1_netdev_open,
  645. .ndo_stop = hfi1_netdev_close,
  646. .ndo_start_xmit = hfi1_netdev_start_xmit,
  647. .ndo_select_queue = hfi1_vnic_select_queue,
  648. .ndo_get_stats64 = hfi1_vnic_get_stats64,
  649. };
  650. static void hfi1_vnic_free_rn(struct net_device *netdev)
  651. {
  652. struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
  653. hfi1_vnic_deinit(vinfo);
  654. mutex_destroy(&vinfo->lock);
  655. free_netdev(netdev);
  656. }
  657. struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
  658. u8 port_num,
  659. enum rdma_netdev_t type,
  660. const char *name,
  661. unsigned char name_assign_type,
  662. void (*setup)(struct net_device *))
  663. {
  664. struct hfi1_devdata *dd = dd_from_ibdev(device);
  665. struct hfi1_vnic_vport_info *vinfo;
  666. struct net_device *netdev;
  667. struct rdma_netdev *rn;
  668. int i, size, rc;
  669. if (!dd->num_vnic_contexts)
  670. return ERR_PTR(-ENOMEM);
  671. if (!port_num || (port_num > dd->num_pports))
  672. return ERR_PTR(-EINVAL);
  673. if (type != RDMA_NETDEV_OPA_VNIC)
  674. return ERR_PTR(-EOPNOTSUPP);
  675. size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
  676. netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
  677. chip_sdma_engines(dd), dd->num_vnic_contexts);
  678. if (!netdev)
  679. return ERR_PTR(-ENOMEM);
  680. rn = netdev_priv(netdev);
  681. vinfo = opa_vnic_dev_priv(netdev);
  682. vinfo->dd = dd;
  683. vinfo->num_tx_q = chip_sdma_engines(dd);
  684. vinfo->num_rx_q = dd->num_vnic_contexts;
  685. vinfo->netdev = netdev;
  686. rn->free_rdma_netdev = hfi1_vnic_free_rn;
  687. rn->set_id = hfi1_vnic_set_vesw_id;
  688. netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
  689. netdev->hw_features = netdev->features;
  690. netdev->vlan_features = netdev->features;
  691. netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
  692. netdev->netdev_ops = &hfi1_netdev_ops;
  693. mutex_init(&vinfo->lock);
  694. for (i = 0; i < vinfo->num_rx_q; i++) {
  695. struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
  696. rxq->idx = i;
  697. rxq->vinfo = vinfo;
  698. rxq->netdev = netdev;
  699. netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
  700. }
  701. rc = hfi1_vnic_init(vinfo);
  702. if (rc)
  703. goto init_fail;
  704. return netdev;
  705. init_fail:
  706. mutex_destroy(&vinfo->lock);
  707. free_netdev(netdev);
  708. return ERR_PTR(rc);
  709. }