vop_vringh.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2016 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * The full GNU General Public License is included in this distribution in
  16. * the file called "COPYING".
  17. *
  18. * Intel Virtio Over PCIe (VOP) driver.
  19. *
  20. */
  21. #include <linux/sched.h>
  22. #include <linux/poll.h>
  23. #include <linux/dma-mapping.h>
  24. #include <linux/mic_common.h>
  25. #include "../common/mic_dev.h"
  26. #include <linux/mic_ioctl.h>
  27. #include "vop_main.h"
  28. /* Helper API to obtain the VOP PCIe device */
  29. static inline struct device *vop_dev(struct vop_vdev *vdev)
  30. {
  31. return vdev->vpdev->dev.parent;
  32. }
  33. /* Helper API to check if a virtio device is initialized */
  34. static inline int vop_vdev_inited(struct vop_vdev *vdev)
  35. {
  36. if (!vdev)
  37. return -EINVAL;
  38. /* Device has not been created yet */
  39. if (!vdev->dd || !vdev->dd->type) {
  40. dev_err(vop_dev(vdev), "%s %d err %d\n",
  41. __func__, __LINE__, -EINVAL);
  42. return -EINVAL;
  43. }
  44. /* Device has been removed/deleted */
  45. if (vdev->dd->type == -1) {
  46. dev_dbg(vop_dev(vdev), "%s %d err %d\n",
  47. __func__, __LINE__, -ENODEV);
  48. return -ENODEV;
  49. }
  50. return 0;
  51. }
  52. static void _vop_notify(struct vringh *vrh)
  53. {
  54. struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
  55. struct vop_vdev *vdev = vvrh->vdev;
  56. struct vop_device *vpdev = vdev->vpdev;
  57. s8 db = vdev->dc->h2c_vdev_db;
  58. if (db != -1)
  59. vpdev->hw_ops->send_intr(vpdev, db);
  60. }
  61. static void vop_virtio_init_post(struct vop_vdev *vdev)
  62. {
  63. struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
  64. struct vop_device *vpdev = vdev->vpdev;
  65. int i, used_size;
  66. for (i = 0; i < vdev->dd->num_vq; i++) {
  67. used_size = PAGE_ALIGN(sizeof(u16) * 3 +
  68. sizeof(struct vring_used_elem) *
  69. le16_to_cpu(vqconfig->num));
  70. if (!le64_to_cpu(vqconfig[i].used_address)) {
  71. dev_warn(vop_dev(vdev), "used_address zero??\n");
  72. continue;
  73. }
  74. vdev->vvr[i].vrh.vring.used =
  75. (void __force *)vpdev->hw_ops->ioremap(
  76. vpdev,
  77. le64_to_cpu(vqconfig[i].used_address),
  78. used_size);
  79. }
  80. vdev->dc->used_address_updated = 0;
  81. dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
  82. __func__, vdev->virtio_id);
  83. }
  84. static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
  85. {
  86. int i;
  87. dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
  88. __func__, vdev->dd->status, vdev->virtio_id);
  89. for (i = 0; i < vdev->dd->num_vq; i++)
  90. /*
  91. * Avoid lockdep false positive. The + 1 is for the vop
  92. * mutex which is held in the reset devices code path.
  93. */
  94. mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
  95. /* 0 status means "reset" */
  96. vdev->dd->status = 0;
  97. vdev->dc->vdev_reset = 0;
  98. vdev->dc->host_ack = 1;
  99. for (i = 0; i < vdev->dd->num_vq; i++) {
  100. struct vringh *vrh = &vdev->vvr[i].vrh;
  101. vdev->vvr[i].vring.info->avail_idx = 0;
  102. vrh->completed = 0;
  103. vrh->last_avail_idx = 0;
  104. vrh->last_used_idx = 0;
  105. }
  106. for (i = 0; i < vdev->dd->num_vq; i++)
  107. mutex_unlock(&vdev->vvr[i].vr_mutex);
  108. }
  109. static void vop_virtio_reset_devices(struct vop_info *vi)
  110. {
  111. struct list_head *pos, *tmp;
  112. struct vop_vdev *vdev;
  113. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  114. vdev = list_entry(pos, struct vop_vdev, list);
  115. vop_virtio_device_reset(vdev);
  116. vdev->poll_wake = 1;
  117. wake_up(&vdev->waitq);
  118. }
  119. }
  120. static void vop_bh_handler(struct work_struct *work)
  121. {
  122. struct vop_vdev *vdev = container_of(work, struct vop_vdev,
  123. virtio_bh_work);
  124. if (vdev->dc->used_address_updated)
  125. vop_virtio_init_post(vdev);
  126. if (vdev->dc->vdev_reset)
  127. vop_virtio_device_reset(vdev);
  128. vdev->poll_wake = 1;
  129. wake_up(&vdev->waitq);
  130. }
  131. static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
  132. {
  133. struct vop_vdev *vdev = data;
  134. struct vop_device *vpdev = vdev->vpdev;
  135. vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
  136. schedule_work(&vdev->virtio_bh_work);
  137. return IRQ_HANDLED;
  138. }
  139. static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
  140. {
  141. DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
  142. int ret = 0, retry, i;
  143. struct vop_device *vpdev = vdev->vpdev;
  144. struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
  145. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  146. s8 db = bootparam->h2c_config_db;
  147. mutex_lock(&vi->vop_mutex);
  148. for (i = 0; i < vdev->dd->num_vq; i++)
  149. mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
  150. if (db == -1 || vdev->dd->type == -1) {
  151. ret = -EIO;
  152. goto exit;
  153. }
  154. memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
  155. vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
  156. vpdev->hw_ops->send_intr(vpdev, db);
  157. for (retry = 100; retry--;) {
  158. ret = wait_event_timeout(wake, vdev->dc->guest_ack,
  159. msecs_to_jiffies(100));
  160. if (ret)
  161. break;
  162. }
  163. dev_dbg(vop_dev(vdev),
  164. "%s %d retry: %d\n", __func__, __LINE__, retry);
  165. vdev->dc->config_change = 0;
  166. vdev->dc->guest_ack = 0;
  167. exit:
  168. for (i = 0; i < vdev->dd->num_vq; i++)
  169. mutex_unlock(&vdev->vvr[i].vr_mutex);
  170. mutex_unlock(&vi->vop_mutex);
  171. return ret;
  172. }
  173. static int vop_copy_dp_entry(struct vop_vdev *vdev,
  174. struct mic_device_desc *argp, __u8 *type,
  175. struct mic_device_desc **devpage)
  176. {
  177. struct vop_device *vpdev = vdev->vpdev;
  178. struct mic_device_desc *devp;
  179. struct mic_vqconfig *vqconfig;
  180. int ret = 0, i;
  181. bool slot_found = false;
  182. vqconfig = mic_vq_config(argp);
  183. for (i = 0; i < argp->num_vq; i++) {
  184. if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
  185. ret = -EINVAL;
  186. dev_err(vop_dev(vdev), "%s %d err %d\n",
  187. __func__, __LINE__, ret);
  188. goto exit;
  189. }
  190. }
  191. /* Find the first free device page entry */
  192. for (i = sizeof(struct mic_bootparam);
  193. i < MIC_DP_SIZE - mic_total_desc_size(argp);
  194. i += mic_total_desc_size(devp)) {
  195. devp = vpdev->hw_ops->get_dp(vpdev) + i;
  196. if (devp->type == 0 || devp->type == -1) {
  197. slot_found = true;
  198. break;
  199. }
  200. }
  201. if (!slot_found) {
  202. ret = -EINVAL;
  203. dev_err(vop_dev(vdev), "%s %d err %d\n",
  204. __func__, __LINE__, ret);
  205. goto exit;
  206. }
  207. /*
  208. * Save off the type before doing the memcpy. Type will be set in the
  209. * end after completing all initialization for the new device.
  210. */
  211. *type = argp->type;
  212. argp->type = 0;
  213. memcpy(devp, argp, mic_desc_size(argp));
  214. *devpage = devp;
  215. exit:
  216. return ret;
  217. }
  218. static void vop_init_device_ctrl(struct vop_vdev *vdev,
  219. struct mic_device_desc *devpage)
  220. {
  221. struct mic_device_ctrl *dc;
  222. dc = (void *)devpage + mic_aligned_desc_size(devpage);
  223. dc->config_change = 0;
  224. dc->guest_ack = 0;
  225. dc->vdev_reset = 0;
  226. dc->host_ack = 0;
  227. dc->used_address_updated = 0;
  228. dc->c2h_vdev_db = -1;
  229. dc->h2c_vdev_db = -1;
  230. vdev->dc = dc;
  231. }
  232. static int vop_virtio_add_device(struct vop_vdev *vdev,
  233. struct mic_device_desc *argp)
  234. {
  235. struct vop_info *vi = vdev->vi;
  236. struct vop_device *vpdev = vi->vpdev;
  237. struct mic_device_desc *dd = NULL;
  238. struct mic_vqconfig *vqconfig;
  239. int vr_size, i, j, ret;
  240. u8 type = 0;
  241. s8 db = -1;
  242. char irqname[16];
  243. struct mic_bootparam *bootparam;
  244. u16 num;
  245. dma_addr_t vr_addr;
  246. bootparam = vpdev->hw_ops->get_dp(vpdev);
  247. init_waitqueue_head(&vdev->waitq);
  248. INIT_LIST_HEAD(&vdev->list);
  249. vdev->vpdev = vpdev;
  250. ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
  251. if (ret) {
  252. dev_err(vop_dev(vdev), "%s %d err %d\n",
  253. __func__, __LINE__, ret);
  254. return ret;
  255. }
  256. vop_init_device_ctrl(vdev, dd);
  257. vdev->dd = dd;
  258. vdev->virtio_id = type;
  259. vqconfig = mic_vq_config(dd);
  260. INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
  261. for (i = 0; i < dd->num_vq; i++) {
  262. struct vop_vringh *vvr = &vdev->vvr[i];
  263. struct mic_vring *vr = &vdev->vvr[i].vring;
  264. num = le16_to_cpu(vqconfig[i].num);
  265. mutex_init(&vvr->vr_mutex);
  266. vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
  267. sizeof(struct _mic_vring_info));
  268. vr->va = (void *)
  269. __get_free_pages(GFP_KERNEL | __GFP_ZERO,
  270. get_order(vr_size));
  271. if (!vr->va) {
  272. ret = -ENOMEM;
  273. dev_err(vop_dev(vdev), "%s %d err %d\n",
  274. __func__, __LINE__, ret);
  275. goto err;
  276. }
  277. vr->len = vr_size;
  278. vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
  279. vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
  280. vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
  281. DMA_BIDIRECTIONAL);
  282. if (dma_mapping_error(&vpdev->dev, vr_addr)) {
  283. free_pages((unsigned long)vr->va, get_order(vr_size));
  284. ret = -ENOMEM;
  285. dev_err(vop_dev(vdev), "%s %d err %d\n",
  286. __func__, __LINE__, ret);
  287. goto err;
  288. }
  289. vqconfig[i].address = cpu_to_le64(vr_addr);
  290. vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
  291. ret = vringh_init_kern(&vvr->vrh,
  292. *(u32 *)mic_vq_features(vdev->dd),
  293. num, false, vr->vr.desc, vr->vr.avail,
  294. vr->vr.used);
  295. if (ret) {
  296. dev_err(vop_dev(vdev), "%s %d err %d\n",
  297. __func__, __LINE__, ret);
  298. goto err;
  299. }
  300. vringh_kiov_init(&vvr->riov, NULL, 0);
  301. vringh_kiov_init(&vvr->wiov, NULL, 0);
  302. vvr->head = USHRT_MAX;
  303. vvr->vdev = vdev;
  304. vvr->vrh.notify = _vop_notify;
  305. dev_dbg(&vpdev->dev,
  306. "%s %d index %d va %p info %p vr_size 0x%x\n",
  307. __func__, __LINE__, i, vr->va, vr->info, vr_size);
  308. vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
  309. get_order(VOP_INT_DMA_BUF_SIZE));
  310. vvr->buf_da = dma_map_single(&vpdev->dev,
  311. vvr->buf, VOP_INT_DMA_BUF_SIZE,
  312. DMA_BIDIRECTIONAL);
  313. }
  314. snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
  315. vdev->virtio_id);
  316. vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
  317. vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
  318. _vop_virtio_intr_handler, irqname, vdev,
  319. vdev->virtio_db);
  320. if (IS_ERR(vdev->virtio_cookie)) {
  321. ret = PTR_ERR(vdev->virtio_cookie);
  322. dev_dbg(&vpdev->dev, "request irq failed\n");
  323. goto err;
  324. }
  325. vdev->dc->c2h_vdev_db = vdev->virtio_db;
  326. /*
  327. * Order the type update with previous stores. This write barrier
  328. * is paired with the corresponding read barrier before the uncached
  329. * system memory read of the type, on the card while scanning the
  330. * device page.
  331. */
  332. smp_wmb();
  333. dd->type = type;
  334. argp->type = type;
  335. if (bootparam) {
  336. db = bootparam->h2c_config_db;
  337. if (db != -1)
  338. vpdev->hw_ops->send_intr(vpdev, db);
  339. }
  340. dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
  341. return 0;
  342. err:
  343. vqconfig = mic_vq_config(dd);
  344. for (j = 0; j < i; j++) {
  345. struct vop_vringh *vvr = &vdev->vvr[j];
  346. dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
  347. vvr->vring.len, DMA_BIDIRECTIONAL);
  348. free_pages((unsigned long)vvr->vring.va,
  349. get_order(vvr->vring.len));
  350. }
  351. return ret;
  352. }
  353. static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
  354. struct vop_device *vpdev)
  355. {
  356. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  357. s8 db;
  358. int ret, retry;
  359. DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
  360. devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
  361. db = bootparam->h2c_config_db;
  362. if (db != -1)
  363. vpdev->hw_ops->send_intr(vpdev, db);
  364. else
  365. goto done;
  366. for (retry = 15; retry--;) {
  367. ret = wait_event_timeout(wake, devp->guest_ack,
  368. msecs_to_jiffies(1000));
  369. if (ret)
  370. break;
  371. }
  372. done:
  373. devp->config_change = 0;
  374. devp->guest_ack = 0;
  375. }
  376. static void vop_virtio_del_device(struct vop_vdev *vdev)
  377. {
  378. struct vop_info *vi = vdev->vi;
  379. struct vop_device *vpdev = vdev->vpdev;
  380. int i;
  381. struct mic_vqconfig *vqconfig;
  382. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  383. if (!bootparam)
  384. goto skip_hot_remove;
  385. vop_dev_remove(vi, vdev->dc, vpdev);
  386. skip_hot_remove:
  387. vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
  388. flush_work(&vdev->virtio_bh_work);
  389. vqconfig = mic_vq_config(vdev->dd);
  390. for (i = 0; i < vdev->dd->num_vq; i++) {
  391. struct vop_vringh *vvr = &vdev->vvr[i];
  392. dma_unmap_single(&vpdev->dev,
  393. vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
  394. DMA_BIDIRECTIONAL);
  395. free_pages((unsigned long)vvr->buf,
  396. get_order(VOP_INT_DMA_BUF_SIZE));
  397. vringh_kiov_cleanup(&vvr->riov);
  398. vringh_kiov_cleanup(&vvr->wiov);
  399. dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
  400. vvr->vring.len, DMA_BIDIRECTIONAL);
  401. free_pages((unsigned long)vvr->vring.va,
  402. get_order(vvr->vring.len));
  403. }
  404. /*
  405. * Order the type update with previous stores. This write barrier
  406. * is paired with the corresponding read barrier before the uncached
  407. * system memory read of the type, on the card while scanning the
  408. * device page.
  409. */
  410. smp_wmb();
  411. vdev->dd->type = -1;
  412. }
  413. /*
  414. * vop_sync_dma - Wrapper for synchronous DMAs.
  415. *
  416. * @dev - The address of the pointer to the device instance used
  417. * for DMA registration.
  418. * @dst - destination DMA address.
  419. * @src - source DMA address.
  420. * @len - size of the transfer.
  421. *
  422. * Return DMA_SUCCESS on success
  423. */
  424. static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
  425. size_t len)
  426. {
  427. int err = 0;
  428. struct dma_device *ddev;
  429. struct dma_async_tx_descriptor *tx;
  430. struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
  431. struct dma_chan *vop_ch = vi->dma_ch;
  432. if (!vop_ch) {
  433. err = -EBUSY;
  434. goto error;
  435. }
  436. ddev = vop_ch->device;
  437. tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
  438. DMA_PREP_FENCE);
  439. if (!tx) {
  440. err = -ENOMEM;
  441. goto error;
  442. } else {
  443. dma_cookie_t cookie;
  444. cookie = tx->tx_submit(tx);
  445. if (dma_submit_error(cookie)) {
  446. err = -ENOMEM;
  447. goto error;
  448. }
  449. dma_async_issue_pending(vop_ch);
  450. err = dma_sync_wait(vop_ch, cookie);
  451. }
  452. error:
  453. if (err)
  454. dev_err(&vi->vpdev->dev, "%s %d err %d\n",
  455. __func__, __LINE__, err);
  456. return err;
  457. }
  458. #define VOP_USE_DMA true
  459. /*
  460. * Initiates the copies across the PCIe bus from card memory to a user
  461. * space buffer. When transfers are done using DMA, source/destination
  462. * addresses and transfer length must follow the alignment requirements of
  463. * the MIC DMA engine.
  464. */
  465. static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
  466. size_t len, u64 daddr, size_t dlen,
  467. int vr_idx)
  468. {
  469. struct vop_device *vpdev = vdev->vpdev;
  470. void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
  471. struct vop_vringh *vvr = &vdev->vvr[vr_idx];
  472. struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
  473. size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
  474. bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
  475. size_t dma_offset, partlen;
  476. int err;
  477. if (!VOP_USE_DMA) {
  478. if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
  479. err = -EFAULT;
  480. dev_err(vop_dev(vdev), "%s %d err %d\n",
  481. __func__, __LINE__, err);
  482. goto err;
  483. }
  484. vdev->in_bytes += len;
  485. err = 0;
  486. goto err;
  487. }
  488. dma_offset = daddr - round_down(daddr, dma_alignment);
  489. daddr -= dma_offset;
  490. len += dma_offset;
  491. /*
  492. * X100 uses DMA addresses as seen by the card so adding
  493. * the aperture base is not required for DMA. However x200
  494. * requires DMA addresses to be an offset into the bar so
  495. * add the aperture base for x200.
  496. */
  497. if (x200)
  498. daddr += vpdev->aper->pa;
  499. while (len) {
  500. partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
  501. err = vop_sync_dma(vdev, vvr->buf_da, daddr,
  502. ALIGN(partlen, dma_alignment));
  503. if (err) {
  504. dev_err(vop_dev(vdev), "%s %d err %d\n",
  505. __func__, __LINE__, err);
  506. goto err;
  507. }
  508. if (copy_to_user(ubuf, vvr->buf + dma_offset,
  509. partlen - dma_offset)) {
  510. err = -EFAULT;
  511. dev_err(vop_dev(vdev), "%s %d err %d\n",
  512. __func__, __LINE__, err);
  513. goto err;
  514. }
  515. daddr += partlen;
  516. ubuf += partlen;
  517. dbuf += partlen;
  518. vdev->in_bytes_dma += partlen;
  519. vdev->in_bytes += partlen;
  520. len -= partlen;
  521. dma_offset = 0;
  522. }
  523. err = 0;
  524. err:
  525. vpdev->hw_ops->iounmap(vpdev, dbuf);
  526. dev_dbg(vop_dev(vdev),
  527. "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
  528. __func__, ubuf, dbuf, len, vr_idx);
  529. return err;
  530. }
  531. /*
  532. * Initiates copies across the PCIe bus from a user space buffer to card
  533. * memory. When transfers are done using DMA, source/destination addresses
  534. * and transfer length must follow the alignment requirements of the MIC
  535. * DMA engine.
  536. */
  537. static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
  538. size_t len, u64 daddr, size_t dlen,
  539. int vr_idx)
  540. {
  541. struct vop_device *vpdev = vdev->vpdev;
  542. void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
  543. struct vop_vringh *vvr = &vdev->vvr[vr_idx];
  544. struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
  545. size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
  546. bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
  547. size_t partlen;
  548. bool dma = VOP_USE_DMA;
  549. int err = 0;
  550. size_t offset = 0;
  551. if (daddr & (dma_alignment - 1)) {
  552. vdev->tx_dst_unaligned += len;
  553. dma = false;
  554. } else if (ALIGN(len, dma_alignment) > dlen) {
  555. vdev->tx_len_unaligned += len;
  556. dma = false;
  557. }
  558. if (!dma)
  559. goto memcpy;
  560. /*
  561. * X100 uses DMA addresses as seen by the card so adding
  562. * the aperture base is not required for DMA. However x200
  563. * requires DMA addresses to be an offset into the bar so
  564. * add the aperture base for x200.
  565. */
  566. if (x200)
  567. daddr += vpdev->aper->pa;
  568. while (len) {
  569. partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
  570. if (copy_from_user(vvr->buf, ubuf, partlen)) {
  571. err = -EFAULT;
  572. dev_err(vop_dev(vdev), "%s %d err %d\n",
  573. __func__, __LINE__, err);
  574. goto err;
  575. }
  576. err = vop_sync_dma(vdev, daddr, vvr->buf_da,
  577. ALIGN(partlen, dma_alignment));
  578. if (err) {
  579. dev_err(vop_dev(vdev), "%s %d err %d\n",
  580. __func__, __LINE__, err);
  581. goto err;
  582. }
  583. daddr += partlen;
  584. ubuf += partlen;
  585. dbuf += partlen;
  586. vdev->out_bytes_dma += partlen;
  587. vdev->out_bytes += partlen;
  588. len -= partlen;
  589. }
  590. memcpy:
  591. /*
  592. * We are copying to IO below and should ideally use something
  593. * like copy_from_user_toio(..) if it existed.
  594. */
  595. while (len) {
  596. partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
  597. if (copy_from_user(vvr->buf, ubuf + offset, partlen)) {
  598. err = -EFAULT;
  599. dev_err(vop_dev(vdev), "%s %d err %d\n",
  600. __func__, __LINE__, err);
  601. goto err;
  602. }
  603. memcpy_toio(dbuf + offset, vvr->buf, partlen);
  604. offset += partlen;
  605. vdev->out_bytes += partlen;
  606. len -= partlen;
  607. }
  608. err = 0;
  609. err:
  610. vpdev->hw_ops->iounmap(vpdev, dbuf);
  611. dev_dbg(vop_dev(vdev),
  612. "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
  613. __func__, ubuf, dbuf, len, vr_idx);
  614. return err;
  615. }
  616. #define MIC_VRINGH_READ true
  617. /* Determine the total number of bytes consumed in a VRINGH KIOV */
  618. static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
  619. {
  620. int i;
  621. u32 total = iov->consumed;
  622. for (i = 0; i < iov->i; i++)
  623. total += iov->iov[i].iov_len;
  624. return total;
  625. }
  626. /*
  627. * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
  628. * This API is heavily based on the vringh_iov_xfer(..) implementation
  629. * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
  630. * and vringh_iov_push_kern(..) directly is because there is no
  631. * way to override the VRINGH xfer(..) routines as of v3.10.
  632. */
  633. static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
  634. void __user *ubuf, size_t len, bool read, int vr_idx,
  635. size_t *out_len)
  636. {
  637. int ret = 0;
  638. size_t partlen, tot_len = 0;
  639. while (len && iov->i < iov->used) {
  640. struct kvec *kiov = &iov->iov[iov->i];
  641. partlen = min(kiov->iov_len, len);
  642. if (read)
  643. ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
  644. (u64)kiov->iov_base,
  645. kiov->iov_len,
  646. vr_idx);
  647. else
  648. ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
  649. (u64)kiov->iov_base,
  650. kiov->iov_len,
  651. vr_idx);
  652. if (ret) {
  653. dev_err(vop_dev(vdev), "%s %d err %d\n",
  654. __func__, __LINE__, ret);
  655. break;
  656. }
  657. len -= partlen;
  658. ubuf += partlen;
  659. tot_len += partlen;
  660. iov->consumed += partlen;
  661. kiov->iov_len -= partlen;
  662. kiov->iov_base += partlen;
  663. if (!kiov->iov_len) {
  664. /* Fix up old iov element then increment. */
  665. kiov->iov_len = iov->consumed;
  666. kiov->iov_base -= iov->consumed;
  667. iov->consumed = 0;
  668. iov->i++;
  669. }
  670. }
  671. *out_len = tot_len;
  672. return ret;
  673. }
  674. /*
  675. * Use the standard VRINGH infrastructure in the kernel to fetch new
  676. * descriptors, initiate the copies and update the used ring.
  677. */
  678. static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
  679. {
  680. int ret = 0;
  681. u32 iovcnt = copy->iovcnt;
  682. struct iovec iov;
  683. struct iovec __user *u_iov = copy->iov;
  684. void __user *ubuf = NULL;
  685. struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
  686. struct vringh_kiov *riov = &vvr->riov;
  687. struct vringh_kiov *wiov = &vvr->wiov;
  688. struct vringh *vrh = &vvr->vrh;
  689. u16 *head = &vvr->head;
  690. struct mic_vring *vr = &vvr->vring;
  691. size_t len = 0, out_len;
  692. copy->out_len = 0;
  693. /* Fetch a new IOVEC if all previous elements have been processed */
  694. if (riov->i == riov->used && wiov->i == wiov->used) {
  695. ret = vringh_getdesc_kern(vrh, riov, wiov,
  696. head, GFP_KERNEL);
  697. /* Check if there are available descriptors */
  698. if (ret <= 0)
  699. return ret;
  700. }
  701. while (iovcnt) {
  702. if (!len) {
  703. /* Copy over a new iovec from user space. */
  704. ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
  705. if (ret) {
  706. ret = -EINVAL;
  707. dev_err(vop_dev(vdev), "%s %d err %d\n",
  708. __func__, __LINE__, ret);
  709. break;
  710. }
  711. len = iov.iov_len;
  712. ubuf = iov.iov_base;
  713. }
  714. /* Issue all the read descriptors first */
  715. ret = vop_vringh_copy(vdev, riov, ubuf, len,
  716. MIC_VRINGH_READ, copy->vr_idx, &out_len);
  717. if (ret) {
  718. dev_err(vop_dev(vdev), "%s %d err %d\n",
  719. __func__, __LINE__, ret);
  720. break;
  721. }
  722. len -= out_len;
  723. ubuf += out_len;
  724. copy->out_len += out_len;
  725. /* Issue the write descriptors next */
  726. ret = vop_vringh_copy(vdev, wiov, ubuf, len,
  727. !MIC_VRINGH_READ, copy->vr_idx, &out_len);
  728. if (ret) {
  729. dev_err(vop_dev(vdev), "%s %d err %d\n",
  730. __func__, __LINE__, ret);
  731. break;
  732. }
  733. len -= out_len;
  734. ubuf += out_len;
  735. copy->out_len += out_len;
  736. if (!len) {
  737. /* One user space iovec is now completed */
  738. iovcnt--;
  739. u_iov++;
  740. }
  741. /* Exit loop if all elements in KIOVs have been processed. */
  742. if (riov->i == riov->used && wiov->i == wiov->used)
  743. break;
  744. }
  745. /*
  746. * Update the used ring if a descriptor was available and some data was
  747. * copied in/out and the user asked for a used ring update.
  748. */
  749. if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
  750. u32 total = 0;
  751. /* Determine the total data consumed */
  752. total += vop_vringh_iov_consumed(riov);
  753. total += vop_vringh_iov_consumed(wiov);
  754. vringh_complete_kern(vrh, *head, total);
  755. *head = USHRT_MAX;
  756. if (vringh_need_notify_kern(vrh) > 0)
  757. vringh_notify(vrh);
  758. vringh_kiov_cleanup(riov);
  759. vringh_kiov_cleanup(wiov);
  760. /* Update avail idx for user space */
  761. vr->info->avail_idx = vrh->last_avail_idx;
  762. }
  763. return ret;
  764. }
  765. static inline int vop_verify_copy_args(struct vop_vdev *vdev,
  766. struct mic_copy_desc *copy)
  767. {
  768. if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
  769. return -EINVAL;
  770. return 0;
  771. }
  772. /* Copy a specified number of virtio descriptors in a chain */
  773. static int vop_virtio_copy_desc(struct vop_vdev *vdev,
  774. struct mic_copy_desc *copy)
  775. {
  776. int err;
  777. struct vop_vringh *vvr;
  778. err = vop_verify_copy_args(vdev, copy);
  779. if (err)
  780. return err;
  781. vvr = &vdev->vvr[copy->vr_idx];
  782. mutex_lock(&vvr->vr_mutex);
  783. if (!vop_vdevup(vdev)) {
  784. err = -ENODEV;
  785. dev_err(vop_dev(vdev), "%s %d err %d\n",
  786. __func__, __LINE__, err);
  787. goto err;
  788. }
  789. err = _vop_virtio_copy(vdev, copy);
  790. if (err) {
  791. dev_err(vop_dev(vdev), "%s %d err %d\n",
  792. __func__, __LINE__, err);
  793. }
  794. err:
  795. mutex_unlock(&vvr->vr_mutex);
  796. return err;
  797. }
  798. static int vop_open(struct inode *inode, struct file *f)
  799. {
  800. struct vop_vdev *vdev;
  801. struct vop_info *vi = container_of(f->private_data,
  802. struct vop_info, miscdev);
  803. vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
  804. if (!vdev)
  805. return -ENOMEM;
  806. vdev->vi = vi;
  807. mutex_init(&vdev->vdev_mutex);
  808. f->private_data = vdev;
  809. init_completion(&vdev->destroy);
  810. complete(&vdev->destroy);
  811. return 0;
  812. }
  813. static int vop_release(struct inode *inode, struct file *f)
  814. {
  815. struct vop_vdev *vdev = f->private_data, *vdev_tmp;
  816. struct vop_info *vi = vdev->vi;
  817. struct list_head *pos, *tmp;
  818. bool found = false;
  819. mutex_lock(&vdev->vdev_mutex);
  820. if (vdev->deleted)
  821. goto unlock;
  822. mutex_lock(&vi->vop_mutex);
  823. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  824. vdev_tmp = list_entry(pos, struct vop_vdev, list);
  825. if (vdev == vdev_tmp) {
  826. vop_virtio_del_device(vdev);
  827. list_del(pos);
  828. found = true;
  829. break;
  830. }
  831. }
  832. mutex_unlock(&vi->vop_mutex);
  833. unlock:
  834. mutex_unlock(&vdev->vdev_mutex);
  835. if (!found)
  836. wait_for_completion(&vdev->destroy);
  837. f->private_data = NULL;
  838. kfree(vdev);
  839. return 0;
  840. }
  841. static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
  842. {
  843. struct vop_vdev *vdev = f->private_data;
  844. struct vop_info *vi = vdev->vi;
  845. void __user *argp = (void __user *)arg;
  846. int ret;
  847. switch (cmd) {
  848. case MIC_VIRTIO_ADD_DEVICE:
  849. {
  850. struct mic_device_desc dd, *dd_config;
  851. if (copy_from_user(&dd, argp, sizeof(dd)))
  852. return -EFAULT;
  853. if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
  854. dd.num_vq > MIC_MAX_VRINGS)
  855. return -EINVAL;
  856. dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL);
  857. if (!dd_config)
  858. return -ENOMEM;
  859. if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
  860. ret = -EFAULT;
  861. goto free_ret;
  862. }
  863. /* Ensure desc has not changed between the two reads */
  864. if (memcmp(&dd, dd_config, sizeof(dd))) {
  865. ret = -EINVAL;
  866. goto free_ret;
  867. }
  868. mutex_lock(&vdev->vdev_mutex);
  869. mutex_lock(&vi->vop_mutex);
  870. ret = vop_virtio_add_device(vdev, dd_config);
  871. if (ret)
  872. goto unlock_ret;
  873. list_add_tail(&vdev->list, &vi->vdev_list);
  874. unlock_ret:
  875. mutex_unlock(&vi->vop_mutex);
  876. mutex_unlock(&vdev->vdev_mutex);
  877. free_ret:
  878. kfree(dd_config);
  879. return ret;
  880. }
  881. case MIC_VIRTIO_COPY_DESC:
  882. {
  883. struct mic_copy_desc copy;
  884. mutex_lock(&vdev->vdev_mutex);
  885. ret = vop_vdev_inited(vdev);
  886. if (ret)
  887. goto _unlock_ret;
  888. if (copy_from_user(&copy, argp, sizeof(copy))) {
  889. ret = -EFAULT;
  890. goto _unlock_ret;
  891. }
  892. ret = vop_virtio_copy_desc(vdev, &copy);
  893. if (ret < 0)
  894. goto _unlock_ret;
  895. if (copy_to_user(
  896. &((struct mic_copy_desc __user *)argp)->out_len,
  897. &copy.out_len, sizeof(copy.out_len)))
  898. ret = -EFAULT;
  899. _unlock_ret:
  900. mutex_unlock(&vdev->vdev_mutex);
  901. return ret;
  902. }
  903. case MIC_VIRTIO_CONFIG_CHANGE:
  904. {
  905. void *buf;
  906. mutex_lock(&vdev->vdev_mutex);
  907. ret = vop_vdev_inited(vdev);
  908. if (ret)
  909. goto __unlock_ret;
  910. buf = kzalloc(vdev->dd->config_len, GFP_KERNEL);
  911. if (!buf) {
  912. ret = -ENOMEM;
  913. goto __unlock_ret;
  914. }
  915. if (copy_from_user(buf, argp, vdev->dd->config_len)) {
  916. ret = -EFAULT;
  917. goto done;
  918. }
  919. ret = vop_virtio_config_change(vdev, buf);
  920. done:
  921. kfree(buf);
  922. __unlock_ret:
  923. mutex_unlock(&vdev->vdev_mutex);
  924. return ret;
  925. }
  926. default:
  927. return -ENOIOCTLCMD;
  928. };
  929. return 0;
  930. }
  931. /*
  932. * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
  933. * not when previously enqueued buffers may be available. This means that
  934. * in the card->host (TX) path, when userspace is unblocked by poll it
  935. * must drain all available descriptors or it can stall.
  936. */
  937. static unsigned int vop_poll(struct file *f, poll_table *wait)
  938. {
  939. struct vop_vdev *vdev = f->private_data;
  940. int mask = 0;
  941. mutex_lock(&vdev->vdev_mutex);
  942. if (vop_vdev_inited(vdev)) {
  943. mask = POLLERR;
  944. goto done;
  945. }
  946. poll_wait(f, &vdev->waitq, wait);
  947. if (vop_vdev_inited(vdev)) {
  948. mask = POLLERR;
  949. } else if (vdev->poll_wake) {
  950. vdev->poll_wake = 0;
  951. mask = POLLIN | POLLOUT;
  952. }
  953. done:
  954. mutex_unlock(&vdev->vdev_mutex);
  955. return mask;
  956. }
  957. static inline int
  958. vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
  959. unsigned long *size, unsigned long *pa)
  960. {
  961. struct vop_device *vpdev = vdev->vpdev;
  962. unsigned long start = MIC_DP_SIZE;
  963. int i;
  964. /*
  965. * MMAP interface is as follows:
  966. * offset region
  967. * 0x0 virtio device_page
  968. * 0x1000 first vring
  969. * 0x1000 + size of 1st vring second vring
  970. * ....
  971. */
  972. if (!offset) {
  973. *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
  974. *size = MIC_DP_SIZE;
  975. return 0;
  976. }
  977. for (i = 0; i < vdev->dd->num_vq; i++) {
  978. struct vop_vringh *vvr = &vdev->vvr[i];
  979. if (offset == start) {
  980. *pa = virt_to_phys(vvr->vring.va);
  981. *size = vvr->vring.len;
  982. return 0;
  983. }
  984. start += vvr->vring.len;
  985. }
  986. return -1;
  987. }
  988. /*
  989. * Maps the device page and virtio rings to user space for readonly access.
  990. */
  991. static int vop_mmap(struct file *f, struct vm_area_struct *vma)
  992. {
  993. struct vop_vdev *vdev = f->private_data;
  994. unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
  995. unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
  996. int i, err;
  997. err = vop_vdev_inited(vdev);
  998. if (err)
  999. goto ret;
  1000. if (vma->vm_flags & VM_WRITE) {
  1001. err = -EACCES;
  1002. goto ret;
  1003. }
  1004. while (size_rem) {
  1005. i = vop_query_offset(vdev, offset, &size, &pa);
  1006. if (i < 0) {
  1007. err = -EINVAL;
  1008. goto ret;
  1009. }
  1010. err = remap_pfn_range(vma, vma->vm_start + offset,
  1011. pa >> PAGE_SHIFT, size,
  1012. vma->vm_page_prot);
  1013. if (err)
  1014. goto ret;
  1015. size_rem -= size;
  1016. offset += size;
  1017. }
  1018. ret:
  1019. return err;
  1020. }
  1021. static const struct file_operations vop_fops = {
  1022. .open = vop_open,
  1023. .release = vop_release,
  1024. .unlocked_ioctl = vop_ioctl,
  1025. .poll = vop_poll,
  1026. .mmap = vop_mmap,
  1027. .owner = THIS_MODULE,
  1028. };
  1029. int vop_host_init(struct vop_info *vi)
  1030. {
  1031. int rc;
  1032. struct miscdevice *mdev;
  1033. struct vop_device *vpdev = vi->vpdev;
  1034. INIT_LIST_HEAD(&vi->vdev_list);
  1035. vi->dma_ch = vpdev->dma_ch;
  1036. mdev = &vi->miscdev;
  1037. mdev->minor = MISC_DYNAMIC_MINOR;
  1038. snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
  1039. mdev->name = vi->name;
  1040. mdev->fops = &vop_fops;
  1041. mdev->parent = &vpdev->dev;
  1042. rc = misc_register(mdev);
  1043. if (rc)
  1044. dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
  1045. return rc;
  1046. }
  1047. void vop_host_uninit(struct vop_info *vi)
  1048. {
  1049. struct list_head *pos, *tmp;
  1050. struct vop_vdev *vdev;
  1051. mutex_lock(&vi->vop_mutex);
  1052. vop_virtio_reset_devices(vi);
  1053. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  1054. vdev = list_entry(pos, struct vop_vdev, list);
  1055. list_del(pos);
  1056. reinit_completion(&vdev->destroy);
  1057. mutex_unlock(&vi->vop_mutex);
  1058. mutex_lock(&vdev->vdev_mutex);
  1059. vop_virtio_del_device(vdev);
  1060. vdev->deleted = true;
  1061. mutex_unlock(&vdev->vdev_mutex);
  1062. complete(&vdev->destroy);
  1063. mutex_lock(&vi->vop_mutex);
  1064. }
  1065. mutex_unlock(&vi->vop_mutex);
  1066. misc_deregister(&vi->miscdev);
  1067. }