kern_physio.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. /* $OpenBSD: kern_physio.c,v 1.43 2015/03/14 03:38:50 jsg Exp $ */
  2. /* $NetBSD: kern_physio.c,v 1.28 1997/05/19 10:43:28 pk Exp $ */
  3. /*-
  4. * Copyright (c) 1994 Christopher G. Demetriou
  5. * Copyright (c) 1982, 1986, 1990, 1993
  6. * The Regents of the University of California. All rights reserved.
  7. * (c) UNIX System Laboratories, Inc.
  8. * All or some portions of this file are derived from material licensed
  9. * to the University of California by American Telephone and Telegraph
  10. * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  11. * the permission of UNIX System Laboratories, Inc.
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions
  15. * are met:
  16. * 1. Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. * 3. Neither the name of the University nor the names of its contributors
  22. * may be used to endorse or promote products derived from this software
  23. * without specific prior written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  26. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  29. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  30. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  31. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  34. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  35. * SUCH DAMAGE.
  36. *
  37. * @(#)kern_physio.c 8.1 (Berkeley) 6/10/93
  38. */
  39. #include <sys/param.h>
  40. #include <sys/systm.h>
  41. #include <sys/buf.h>
  42. #include <sys/pool.h>
  43. #include <uvm/uvm_extern.h>
  44. /*
  45. * The routines implemented in this file are described in:
  46. * Leffler, et al.: The Design and Implementation of the 4.3BSD
  47. * UNIX Operating System (Addison Welley, 1989)
  48. * on pages 231-233.
  49. */
  50. /*
  51. * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly
  52. * from the raw device to user buffers, and bypasses the buffer cache.
  53. *
  54. * Comments in brackets are from Leffler, et al.'s pseudo-code implementation.
  55. */
  56. int
  57. physio(void (*strategy)(struct buf *), dev_t dev, int flags,
  58. void (*minphys)(struct buf *), struct uio *uio)
  59. {
  60. struct iovec *iovp;
  61. struct proc *p = curproc;
  62. int error, done, i, s, todo;
  63. struct buf *bp;
  64. if ((uio->uio_offset % DEV_BSIZE) != 0)
  65. return (EINVAL);
  66. error = 0;
  67. flags &= B_READ | B_WRITE;
  68. /* Create a buffer. */
  69. s = splbio();
  70. bp = pool_get(&bufpool, PR_WAITOK | PR_ZERO);
  71. /* [set up the fixed part of the buffer for a transfer] */
  72. bp->b_vnbufs.le_next = NOLIST;
  73. bp->b_dev = dev;
  74. bp->b_error = 0;
  75. bp->b_proc = p;
  76. bp->b_flags = B_BUSY;
  77. LIST_INIT(&bp->b_dep);
  78. splx(s);
  79. /*
  80. * [while there are data to transfer and no I/O error]
  81. * Note that I/O errors are handled with a 'goto' at the bottom
  82. * of the 'while' loop.
  83. */
  84. for (i = 0; i < uio->uio_iovcnt; i++) {
  85. iovp = &uio->uio_iov[i];
  86. while (iovp->iov_len > 0) {
  87. void *map = NULL;
  88. /*
  89. * [mark the buffer busy for physical I/O]
  90. * (i.e. set B_PHYS (because it's an I/O to user
  91. * memory), and B_RAW, because B_RAW is to be
  92. * "Set by physio for raw transfers.", in addition
  93. * to the "busy" and read/write flag.)
  94. */
  95. CLR(bp->b_flags, B_DONE | B_ERROR);
  96. bp->b_flags |= (B_BUSY | B_PHYS | B_RAW | flags);
  97. /* [set up the buffer for a maximum-sized transfer] */
  98. bp->b_blkno = btodb(uio->uio_offset);
  99. /*
  100. * Because iov_len is unsigned but b_bcount is signed,
  101. * an overflow is possible. Therefore bound to MAXPHYS
  102. * before calling minphys.
  103. */
  104. if (iovp->iov_len > MAXPHYS)
  105. bp->b_bcount = MAXPHYS;
  106. else
  107. bp->b_bcount = iovp->iov_len;
  108. /*
  109. * [call minphys to bound the transfer size]
  110. * and remember the amount of data to transfer,
  111. * for later comparison.
  112. */
  113. (*minphys)(bp);
  114. todo = bp->b_bcount;
  115. KASSERTMSG(todo >= 0, "minphys broken");
  116. KASSERTMSG(todo <= MAXPHYS, "minphys broken");
  117. /*
  118. * [lock the part of the user address space involved
  119. * in the transfer]
  120. * Beware vmapbuf(); it clobbers b_data and
  121. * saves it in b_saveaddr. However, vunmapbuf()
  122. * restores it.
  123. */
  124. error = uvm_vslock_device(p, iovp->iov_base, todo,
  125. (flags & B_READ) ?
  126. PROT_READ | PROT_WRITE : PROT_READ, &map);
  127. if (error)
  128. goto done;
  129. if (map) {
  130. bp->b_data = map;
  131. } else {
  132. bp->b_data = iovp->iov_base;
  133. vmapbuf(bp, todo);
  134. }
  135. /* [call strategy to start the transfer] */
  136. (*strategy)(bp);
  137. /*
  138. * Note that the raise/wait/lower/get error
  139. * steps below would be done by biowait(), but
  140. * we want to unlock the address space before
  141. * we lower the priority.
  142. *
  143. * [raise the priority level to splbio]
  144. */
  145. s = splbio();
  146. /* [wait for the transfer to complete] */
  147. while ((bp->b_flags & B_DONE) == 0)
  148. tsleep(bp, PRIBIO + 1, "physio", 0);
  149. /* Mark it busy again, so nobody else will use it. */
  150. bp->b_flags |= B_BUSY;
  151. /* [lower the priority level] */
  152. splx(s);
  153. /*
  154. * [unlock the part of the address space previously
  155. * locked]
  156. */
  157. if (!map)
  158. vunmapbuf(bp, todo);
  159. uvm_vsunlock_device(p, iovp->iov_base, todo, map);
  160. /* remember error value (save a splbio/splx pair) */
  161. if (bp->b_flags & B_ERROR)
  162. error = (bp->b_error ? bp->b_error : EIO);
  163. /*
  164. * [deduct the transfer size from the total number
  165. * of data to transfer]
  166. */
  167. done = bp->b_bcount - bp->b_resid;
  168. KASSERTMSG(done >= 0, "strategy broken");
  169. KASSERTMSG(done <= todo, "strategy broken");
  170. iovp->iov_len -= done;
  171. iovp->iov_base = (caddr_t)iovp->iov_base + done;
  172. uio->uio_offset += done;
  173. uio->uio_resid -= done;
  174. /*
  175. * Now, check for an error.
  176. * Also, handle weird end-of-disk semantics.
  177. */
  178. if (error || done < todo)
  179. goto done;
  180. }
  181. }
  182. done:
  183. /*
  184. * [clean up the state of the buffer]
  185. */
  186. s = splbio();
  187. /* XXXCDC: is this necessary? */
  188. if (bp->b_vp)
  189. brelvp(bp);
  190. splx(s);
  191. pool_put(&bufpool, bp);
  192. return (error);
  193. }
  194. /*
  195. * Leffler, et al., says on p. 231:
  196. * "The minphys() routine is called by physio() to adjust the
  197. * size of each I/O transfer before the latter is passed to
  198. * the strategy routine..."
  199. *
  200. * so, just adjust the buffer's count accounting to MAXPHYS here,
  201. * and return the new count;
  202. */
  203. void
  204. minphys(struct buf *bp)
  205. {
  206. if (bp->b_bcount > MAXPHYS)
  207. bp->b_bcount = MAXPHYS;
  208. }