buf.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /* $OpenBSD: buf.h,v 1.99 2015/07/19 16:21:11 beck Exp $ */
  2. /* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */
  3. /*
  4. * Copyright (c) 1982, 1986, 1989, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. * (c) UNIX System Laboratories, Inc.
  7. * All or some portions of this file are derived from material licensed
  8. * to the University of California by American Telephone and Telegraph
  9. * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  10. * the permission of UNIX System Laboratories, Inc.
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions
  14. * are met:
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in the
  19. * documentation and/or other materials provided with the distribution.
  20. * 3. Neither the name of the University nor the names of its contributors
  21. * may be used to endorse or promote products derived from this software
  22. * without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34. * SUCH DAMAGE.
  35. *
  36. * @(#)buf.h 8.7 (Berkeley) 1/21/94
  37. */
  38. #ifndef _SYS_BUF_H_
  39. #define _SYS_BUF_H_
  40. #include <sys/queue.h>
  41. #include <sys/tree.h>
  42. #include <sys/mutex.h>
  43. #define NOLIST ((struct buf *)0x87654321)
  44. struct buf;
  45. struct vnode;
  46. struct buf_rb_bufs;
  47. RB_PROTOTYPE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare);
  48. LIST_HEAD(bufhead, buf);
  49. /*
  50. * To avoid including <ufs/ffs/softdep.h>
  51. */
  52. LIST_HEAD(workhead, worklist);
  53. /*
  54. * Buffer queues
  55. */
  56. #define BUFQ_NSCAN_N 128
  57. #define BUFQ_FIFO 0
  58. #define BUFQ_NSCAN 1
  59. #define BUFQ_DEFAULT BUFQ_NSCAN
  60. #define BUFQ_HOWMANY 2
  61. /*
  62. * Write limits for bufq - defines high and low water marks for how
  63. * many kva slots are allowed to be consumed to parallelize writes from
  64. * the buffer cache from any individual bufq.
  65. */
  66. #define BUFQ_HI 128
  67. #define BUFQ_LOW 64
  68. struct bufq_impl;
  69. struct bufq {
  70. SLIST_ENTRY(bufq) bufq_entries;
  71. struct mutex bufq_mtx;
  72. void *bufq_data;
  73. u_int bufq_outstanding;
  74. u_int bufq_hi;
  75. u_int bufq_low;
  76. int bufq_waiting;
  77. int bufq_stop;
  78. int bufq_type;
  79. const struct bufq_impl *bufq_impl;
  80. };
  81. int bufq_init(struct bufq *, int);
  82. int bufq_switch(struct bufq *, int);
  83. void bufq_destroy(struct bufq *);
  84. void bufq_queue(struct bufq *, struct buf *);
  85. struct buf *bufq_dequeue(struct bufq *);
  86. void bufq_requeue(struct bufq *, struct buf *);
  87. int bufq_peek(struct bufq *);
  88. void bufq_drain(struct bufq *);
  89. void bufq_wait(struct bufq *);
  90. void bufq_done(struct bufq *, struct buf *);
  91. void bufq_quiesce(void);
  92. void bufq_restart(void);
  93. /* fifo */
  94. SIMPLEQ_HEAD(bufq_fifo_head, buf);
  95. struct bufq_fifo {
  96. SIMPLEQ_ENTRY(buf) bqf_entries;
  97. };
  98. /* nscan */
  99. SIMPLEQ_HEAD(bufq_nscan_head, buf);
  100. struct bufq_nscan {
  101. SIMPLEQ_ENTRY(buf) bqf_entries;
  102. };
  103. /* bufq link in struct buf */
  104. union bufq_data {
  105. struct bufq_fifo bufq_data_fifo;
  106. struct bufq_nscan bufq_data_nscan;
  107. };
  108. /*
  109. * These are currently used only by the soft dependency code, hence
  110. * are stored once in a global variable. If other subsystems wanted
  111. * to use these hooks, a pointer to a set of bio_ops could be added
  112. * to each buffer.
  113. */
  114. extern struct bio_ops {
  115. void (*io_start)(struct buf *);
  116. void (*io_complete)(struct buf *);
  117. void (*io_deallocate)(struct buf *);
  118. void (*io_movedeps)(struct buf *, struct buf *);
  119. int (*io_countdeps)(struct buf *, int, int);
  120. } bioops;
  121. /* The buffer header describes an I/O operation in the kernel. */
  122. struct buf {
  123. RB_ENTRY(buf) b_rbbufs; /* vnode "hash" tree */
  124. LIST_ENTRY(buf) b_list; /* All allocated buffers. */
  125. LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
  126. TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
  127. int cache; /* which cache are we in */
  128. struct proc *b_proc; /* Associated proc; NULL if kernel. */
  129. volatile long b_flags; /* B_* flags. */
  130. long b_bufsize; /* Allocated buffer size. */
  131. long b_bcount; /* Valid bytes in buffer. */
  132. size_t b_resid; /* Remaining I/O. */
  133. int b_error; /* Errno value. */
  134. dev_t b_dev; /* Device associated with buffer. */
  135. caddr_t b_data; /* associated data */
  136. void *b_saveaddr; /* Original b_data for physio. */
  137. TAILQ_ENTRY(buf) b_valist; /* LRU of va to reuse. */
  138. union bufq_data b_bufq;
  139. struct bufq *b_bq; /* What bufq this buf is on */
  140. struct uvm_object *b_pobj; /* Object containing the pages */
  141. off_t b_poffs; /* Offset within object */
  142. daddr_t b_lblkno; /* Logical block number. */
  143. daddr_t b_blkno; /* Underlying physical block number. */
  144. /* Function to call upon completion.
  145. * Will be called at splbio(). */
  146. void (*b_iodone)(struct buf *);
  147. struct vnode *b_vp; /* Device vnode. */
  148. int b_dirtyoff; /* Offset in buffer of dirty region. */
  149. int b_dirtyend; /* Offset of end of dirty region. */
  150. int b_validoff; /* Offset in buffer of valid region. */
  151. int b_validend; /* Offset of end of valid region. */
  152. struct workhead b_dep; /* List of filesystem dependencies. */
  153. };
  154. TAILQ_HEAD(bufqueue, buf);
  155. struct bufcache {
  156. int64_t hotbufpages;
  157. int64_t warmbufpages;
  158. int64_t cachepages;
  159. struct bufqueue hotqueue;
  160. struct bufqueue coldqueue;
  161. struct bufqueue warmqueue;
  162. };
  163. /* Device driver compatibility definitions. */
  164. #define b_active b_bcount /* Driver queue head: drive active. */
  165. /*
  166. * These flags are kept in b_flags.
  167. */
  168. #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
  169. #define B_AGE 0x00000001 /* Move to age queue when I/O done. */
  170. #define B_NEEDCOMMIT 0x00000002 /* Needs committing to stable storage */
  171. #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
  172. #define B_BAD 0x00000008 /* Bad block revectoring in progress. */
  173. #define B_BUSY 0x00000010 /* I/O in progress. */
  174. #define B_CACHE 0x00000020 /* Bread found us in the cache. */
  175. #define B_CALL 0x00000040 /* Call b_iodone from biodone. */
  176. #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
  177. #define B_DONE 0x00000100 /* I/O completed. */
  178. #define B_EINTR 0x00000200 /* I/O was interrupted */
  179. #define B_ERROR 0x00000400 /* I/O error occurred. */
  180. #define B_INVAL 0x00000800 /* Does not contain valid info. */
  181. #define B_NOCACHE 0x00001000 /* Do not cache block after use. */
  182. #define B_PHYS 0x00002000 /* I/O to user memory. */
  183. #define B_RAW 0x00004000 /* Set by physio for raw transfers. */
  184. #define B_READ 0x00008000 /* Read buffer. */
  185. #define B_WANTED 0x00010000 /* Process wants this buffer. */
  186. #define B_WRITEINPROG 0x00020000 /* Write in progress. */
  187. #define B_XXX 0x00040000 /* Debugging flag. */
  188. #define B_DEFERRED 0x00080000 /* Skipped over for cleaning */
  189. #define B_SCANNED 0x00100000 /* Block already pushed during sync */
  190. #define B_PDAEMON 0x00200000 /* I/O started by pagedaemon */
  191. #define B_RELEASED 0x00400000 /* free this buffer after its kvm */
  192. #define B_WARM 0x00800000 /* buffer is or has been on the warm queue */
  193. #define B_COLD 0x01000000 /* buffer is on the cold queue */
  194. #define B_BC 0x02000000 /* buffer is managed by the cache */
  195. #define B_DMA 0x04000000 /* buffer is DMA reachable */
  196. #define B_BITS "\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
  197. "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
  198. "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
  199. "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
  200. "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA"
  201. /*
  202. * This structure describes a clustered I/O. It is stored in the b_saveaddr
  203. * field of the buffer on which I/O is done. At I/O completion, cluster
  204. * callback uses the structure to parcel I/O's to individual buffers, and
  205. * then free's this structure.
  206. */
  207. struct cluster_save {
  208. long bs_bcount; /* Saved b_bcount. */
  209. long bs_bufsize; /* Saved b_bufsize. */
  210. void *bs_saveaddr; /* Saved b_addr. */
  211. int bs_nchildren; /* Number of associated buffers. */
  212. struct buf **bs_children; /* List of associated buffers. */
  213. };
  214. /*
  215. * Zero out the buffer's data area.
  216. */
  217. #define clrbuf(bp) { \
  218. bzero((bp)->b_data, (u_int)(bp)->b_bcount); \
  219. (bp)->b_resid = 0; \
  220. }
  221. /* Flags to low-level allocation routines. */
  222. #define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
  223. #define B_SYNC 0x02 /* Do all allocations synchronously. */
  224. struct cluster_info {
  225. daddr_t ci_lastr; /* last read (read-ahead) */
  226. daddr_t ci_lastw; /* last write (write cluster) */
  227. daddr_t ci_cstart; /* start block of cluster */
  228. daddr_t ci_lasta; /* last allocation */
  229. int ci_clen; /* length of current cluster */
  230. int ci_ralen; /* Read-ahead length */
  231. daddr_t ci_maxra; /* last readahead block */
  232. };
  233. #ifdef _KERNEL
  234. __BEGIN_DECLS
  235. /* Kva slots (of size MAXPHYS) reserved for syncer and cleaner. */
  236. #define RESERVE_SLOTS 4
  237. /* Buffer cache pages reserved for syncer and cleaner. */
  238. #define RESERVE_PAGES (RESERVE_SLOTS * MAXPHYS / PAGE_SIZE)
  239. /* Minimum size of the buffer cache, in pages. */
  240. #define BCACHE_MIN (RESERVE_PAGES * 2)
  241. #define UNCLEAN_PAGES (bcstats.numbufpages - bcstats.numcleanpages)
  242. extern struct proc *cleanerproc;
  243. extern long bufpages; /* Max number of pages for buffers' data */
  244. extern struct pool bufpool;
  245. extern struct bufhead bufhead;
  246. void bawrite(struct buf *);
  247. void bdwrite(struct buf *);
  248. void biodone(struct buf *);
  249. int biowait(struct buf *);
  250. int bread(struct vnode *, daddr_t, int, struct buf **);
  251. int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
  252. struct buf **);
  253. void brelse(struct buf *);
  254. #define bremfree bufcache_take
  255. void bufinit(void);
  256. void buf_dirty(struct buf *);
  257. void buf_undirty(struct buf *);
  258. int bwrite(struct buf *);
  259. struct buf *getblk(struct vnode *, daddr_t, int, int, int);
  260. struct buf *geteblk(int);
  261. struct buf *incore(struct vnode *, daddr_t);
  262. /*
  263. * bufcache functions
  264. */
  265. void bufcache_take(struct buf *);
  266. void bufcache_release(struct buf *);
  267. void buf_flip_high(struct buf *);
  268. void buf_flip_dma(struct buf *);
  269. struct buf *bufcache_getcleanbuf(int);
  270. struct buf *bufcache_getanycleanbuf(void);
  271. struct buf *bufcache_getdirtybuf(void);
  272. /*
  273. * buf_kvm_init initializes the kvm handling for buffers.
  274. * buf_acquire sets the B_BUSY flag and ensures that the buffer is
  275. * mapped in the kvm.
  276. * buf_release clears the B_BUSY flag and allows the buffer to become
  277. * unmapped.
  278. * buf_unmap is for internal use only. Unmaps the buffer from kvm.
  279. */
  280. void buf_mem_init(vsize_t);
  281. void buf_acquire(struct buf *);
  282. void buf_acquire_unmapped(struct buf *);
  283. void buf_acquire_nomap(struct buf *);
  284. void buf_map(struct buf *);
  285. void buf_release(struct buf *);
  286. int buf_dealloc_mem(struct buf *);
  287. void buf_fix_mapping(struct buf *, vsize_t);
  288. void buf_alloc_pages(struct buf *, vsize_t);
  289. void buf_free_pages(struct buf *);
  290. void minphys(struct buf *bp);
  291. int physio(void (*strategy)(struct buf *), dev_t dev, int flags,
  292. void (*minphys)(struct buf *), struct uio *uio);
  293. void brelvp(struct buf *);
  294. void reassignbuf(struct buf *);
  295. void bgetvp(struct vnode *, struct buf *);
  296. void buf_replacevnode(struct buf *, struct vnode *);
  297. void buf_daemon(struct proc *);
  298. void buf_replacevnode(struct buf *, struct vnode *);
  299. void buf_daemon(struct proc *);
  300. int bread_cluster(struct vnode *, daddr_t, int, struct buf **);
  301. #ifdef DEBUG
  302. void buf_print(struct buf *);
  303. #endif
  304. static __inline void
  305. buf_start(struct buf *bp)
  306. {
  307. if (bioops.io_start)
  308. (*bioops.io_start)(bp);
  309. }
  310. static __inline void
  311. buf_complete(struct buf *bp)
  312. {
  313. if (bioops.io_complete)
  314. (*bioops.io_complete)(bp);
  315. }
  316. static __inline void
  317. buf_deallocate(struct buf *bp)
  318. {
  319. if (bioops.io_deallocate)
  320. (*bioops.io_deallocate)(bp);
  321. }
  322. static __inline void
  323. buf_movedeps(struct buf *bp, struct buf *bp2)
  324. {
  325. if (bioops.io_movedeps)
  326. (*bioops.io_movedeps)(bp, bp2);
  327. }
  328. static __inline int
  329. buf_countdeps(struct buf *bp, int i, int islocked)
  330. {
  331. if (bioops.io_countdeps)
  332. return ((*bioops.io_countdeps)(bp, i, islocked));
  333. else
  334. return (0);
  335. }
  336. void cluster_write(struct buf *, struct cluster_info *, u_quad_t);
  337. __END_DECLS
  338. #endif /* _KERNEL */
  339. #endif /* !_SYS_BUF_H_ */