bio.c 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103
  1. /*
  2. * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License version 2 as
  6. * published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public Licens
  14. * along with this program; if not, write to the Free Software
  15. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
  16. *
  17. */
  18. #include <linux/mm.h>
  19. #include <linux/swap.h>
  20. #include <linux/bio.h>
  21. #include <linux/blkdev.h>
  22. #include <linux/uio.h>
  23. #include <linux/iocontext.h>
  24. #include <linux/slab.h>
  25. #include <linux/init.h>
  26. #include <linux/kernel.h>
  27. #include <linux/export.h>
  28. #include <linux/mempool.h>
  29. #include <linux/workqueue.h>
  30. #include <linux/cgroup.h>
  31. #include <linux/blk-cgroup.h>
  32. #include <trace/events/block.h>
  33. #include "blk.h"
  34. #include "blk-rq-qos.h"
  35. /*
  36. * Test patch to inline a certain number of bi_io_vec's inside the bio
  37. * itself, to shrink a bio data allocation from two mempool calls to one
  38. */
  39. #define BIO_INLINE_VECS 4
  40. /*
  41. * if you change this list, also change bvec_alloc or things will
  42. * break badly! cannot be bigger than what you can fit into an
  43. * unsigned short
  44. */
  45. #define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
  46. static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
  47. BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
  48. };
  49. #undef BV
  50. /*
  51. * fs_bio_set is the bio_set containing bio and iovec memory pools used by
  52. * IO code that does not need private memory pools.
  53. */
  54. struct bio_set fs_bio_set;
  55. EXPORT_SYMBOL(fs_bio_set);
  56. /*
  57. * Our slab pool management
  58. */
  59. struct bio_slab {
  60. struct kmem_cache *slab;
  61. unsigned int slab_ref;
  62. unsigned int slab_size;
  63. char name[8];
  64. };
  65. static DEFINE_MUTEX(bio_slab_lock);
  66. static struct bio_slab *bio_slabs;
  67. static unsigned int bio_slab_nr, bio_slab_max;
  68. static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
  69. {
  70. unsigned int sz = sizeof(struct bio) + extra_size;
  71. struct kmem_cache *slab = NULL;
  72. struct bio_slab *bslab, *new_bio_slabs;
  73. unsigned int new_bio_slab_max;
  74. unsigned int i, entry = -1;
  75. mutex_lock(&bio_slab_lock);
  76. i = 0;
  77. while (i < bio_slab_nr) {
  78. bslab = &bio_slabs[i];
  79. if (!bslab->slab && entry == -1)
  80. entry = i;
  81. else if (bslab->slab_size == sz) {
  82. slab = bslab->slab;
  83. bslab->slab_ref++;
  84. break;
  85. }
  86. i++;
  87. }
  88. if (slab)
  89. goto out_unlock;
  90. if (bio_slab_nr == bio_slab_max && entry == -1) {
  91. new_bio_slab_max = bio_slab_max << 1;
  92. new_bio_slabs = krealloc(bio_slabs,
  93. new_bio_slab_max * sizeof(struct bio_slab),
  94. GFP_KERNEL);
  95. if (!new_bio_slabs)
  96. goto out_unlock;
  97. bio_slab_max = new_bio_slab_max;
  98. bio_slabs = new_bio_slabs;
  99. }
  100. if (entry == -1)
  101. entry = bio_slab_nr++;
  102. bslab = &bio_slabs[entry];
  103. snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
  104. slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
  105. SLAB_HWCACHE_ALIGN, NULL);
  106. if (!slab)
  107. goto out_unlock;
  108. bslab->slab = slab;
  109. bslab->slab_ref = 1;
  110. bslab->slab_size = sz;
  111. out_unlock:
  112. mutex_unlock(&bio_slab_lock);
  113. return slab;
  114. }
  115. static void bio_put_slab(struct bio_set *bs)
  116. {
  117. struct bio_slab *bslab = NULL;
  118. unsigned int i;
  119. mutex_lock(&bio_slab_lock);
  120. for (i = 0; i < bio_slab_nr; i++) {
  121. if (bs->bio_slab == bio_slabs[i].slab) {
  122. bslab = &bio_slabs[i];
  123. break;
  124. }
  125. }
  126. if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
  127. goto out;
  128. WARN_ON(!bslab->slab_ref);
  129. if (--bslab->slab_ref)
  130. goto out;
  131. kmem_cache_destroy(bslab->slab);
  132. bslab->slab = NULL;
  133. out:
  134. mutex_unlock(&bio_slab_lock);
  135. }
  136. unsigned int bvec_nr_vecs(unsigned short idx)
  137. {
  138. return bvec_slabs[--idx].nr_vecs;
  139. }
  140. void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
  141. {
  142. if (!idx)
  143. return;
  144. idx--;
  145. BIO_BUG_ON(idx >= BVEC_POOL_NR);
  146. if (idx == BVEC_POOL_MAX) {
  147. mempool_free(bv, pool);
  148. } else {
  149. struct biovec_slab *bvs = bvec_slabs + idx;
  150. kmem_cache_free(bvs->slab, bv);
  151. }
  152. }
  153. struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
  154. mempool_t *pool)
  155. {
  156. struct bio_vec *bvl;
  157. /*
  158. * see comment near bvec_array define!
  159. */
  160. switch (nr) {
  161. case 1:
  162. *idx = 0;
  163. break;
  164. case 2 ... 4:
  165. *idx = 1;
  166. break;
  167. case 5 ... 16:
  168. *idx = 2;
  169. break;
  170. case 17 ... 64:
  171. *idx = 3;
  172. break;
  173. case 65 ... 128:
  174. *idx = 4;
  175. break;
  176. case 129 ... BIO_MAX_PAGES:
  177. *idx = 5;
  178. break;
  179. default:
  180. return NULL;
  181. }
  182. /*
  183. * idx now points to the pool we want to allocate from. only the
  184. * 1-vec entry pool is mempool backed.
  185. */
  186. if (*idx == BVEC_POOL_MAX) {
  187. fallback:
  188. bvl = mempool_alloc(pool, gfp_mask);
  189. } else {
  190. struct biovec_slab *bvs = bvec_slabs + *idx;
  191. gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
  192. /*
  193. * Make this allocation restricted and don't dump info on
  194. * allocation failures, since we'll fallback to the mempool
  195. * in case of failure.
  196. */
  197. __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
  198. /*
  199. * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
  200. * is set, retry with the 1-entry mempool
  201. */
  202. bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
  203. if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
  204. *idx = BVEC_POOL_MAX;
  205. goto fallback;
  206. }
  207. }
  208. (*idx)++;
  209. return bvl;
  210. }
  211. void bio_uninit(struct bio *bio)
  212. {
  213. bio_disassociate_task(bio);
  214. }
  215. EXPORT_SYMBOL(bio_uninit);
  216. static void bio_free(struct bio *bio)
  217. {
  218. struct bio_set *bs = bio->bi_pool;
  219. void *p;
  220. bio_uninit(bio);
  221. if (bs) {
  222. bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
  223. /*
  224. * If we have front padding, adjust the bio pointer before freeing
  225. */
  226. p = bio;
  227. p -= bs->front_pad;
  228. mempool_free(p, &bs->bio_pool);
  229. } else {
  230. /* Bio was allocated by bio_kmalloc() */
  231. kfree(bio);
  232. }
  233. }
  234. /*
  235. * Users of this function have their own bio allocation. Subsequently,
  236. * they must remember to pair any call to bio_init() with bio_uninit()
  237. * when IO has completed, or when the bio is released.
  238. */
  239. void bio_init(struct bio *bio, struct bio_vec *table,
  240. unsigned short max_vecs)
  241. {
  242. memset(bio, 0, sizeof(*bio));
  243. atomic_set(&bio->__bi_remaining, 1);
  244. atomic_set(&bio->__bi_cnt, 1);
  245. bio->bi_io_vec = table;
  246. bio->bi_max_vecs = max_vecs;
  247. }
  248. EXPORT_SYMBOL(bio_init);
  249. /**
  250. * bio_reset - reinitialize a bio
  251. * @bio: bio to reset
  252. *
  253. * Description:
  254. * After calling bio_reset(), @bio will be in the same state as a freshly
  255. * allocated bio returned bio bio_alloc_bioset() - the only fields that are
  256. * preserved are the ones that are initialized by bio_alloc_bioset(). See
  257. * comment in struct bio.
  258. */
  259. void bio_reset(struct bio *bio)
  260. {
  261. unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
  262. bio_uninit(bio);
  263. memset(bio, 0, BIO_RESET_BYTES);
  264. bio->bi_flags = flags;
  265. atomic_set(&bio->__bi_remaining, 1);
  266. }
  267. EXPORT_SYMBOL(bio_reset);
  268. static struct bio *__bio_chain_endio(struct bio *bio)
  269. {
  270. struct bio *parent = bio->bi_private;
  271. if (!parent->bi_status)
  272. parent->bi_status = bio->bi_status;
  273. bio_put(bio);
  274. return parent;
  275. }
  276. static void bio_chain_endio(struct bio *bio)
  277. {
  278. bio_endio(__bio_chain_endio(bio));
  279. }
  280. /**
  281. * bio_chain - chain bio completions
  282. * @bio: the target bio
  283. * @parent: the @bio's parent bio
  284. *
  285. * The caller won't have a bi_end_io called when @bio completes - instead,
  286. * @parent's bi_end_io won't be called until both @parent and @bio have
  287. * completed; the chained bio will also be freed when it completes.
  288. *
  289. * The caller must not set bi_private or bi_end_io in @bio.
  290. */
  291. void bio_chain(struct bio *bio, struct bio *parent)
  292. {
  293. BUG_ON(bio->bi_private || bio->bi_end_io);
  294. bio->bi_private = parent;
  295. bio->bi_end_io = bio_chain_endio;
  296. bio_inc_remaining(parent);
  297. }
  298. EXPORT_SYMBOL(bio_chain);
  299. static void bio_alloc_rescue(struct work_struct *work)
  300. {
  301. struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
  302. struct bio *bio;
  303. while (1) {
  304. spin_lock(&bs->rescue_lock);
  305. bio = bio_list_pop(&bs->rescue_list);
  306. spin_unlock(&bs->rescue_lock);
  307. if (!bio)
  308. break;
  309. generic_make_request(bio);
  310. }
  311. }
  312. static void punt_bios_to_rescuer(struct bio_set *bs)
  313. {
  314. struct bio_list punt, nopunt;
  315. struct bio *bio;
  316. if (WARN_ON_ONCE(!bs->rescue_workqueue))
  317. return;
  318. /*
  319. * In order to guarantee forward progress we must punt only bios that
  320. * were allocated from this bio_set; otherwise, if there was a bio on
  321. * there for a stacking driver higher up in the stack, processing it
  322. * could require allocating bios from this bio_set, and doing that from
  323. * our own rescuer would be bad.
  324. *
  325. * Since bio lists are singly linked, pop them all instead of trying to
  326. * remove from the middle of the list:
  327. */
  328. bio_list_init(&punt);
  329. bio_list_init(&nopunt);
  330. while ((bio = bio_list_pop(&current->bio_list[0])))
  331. bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
  332. current->bio_list[0] = nopunt;
  333. bio_list_init(&nopunt);
  334. while ((bio = bio_list_pop(&current->bio_list[1])))
  335. bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
  336. current->bio_list[1] = nopunt;
  337. spin_lock(&bs->rescue_lock);
  338. bio_list_merge(&bs->rescue_list, &punt);
  339. spin_unlock(&bs->rescue_lock);
  340. queue_work(bs->rescue_workqueue, &bs->rescue_work);
  341. }
  342. /**
  343. * bio_alloc_bioset - allocate a bio for I/O
  344. * @gfp_mask: the GFP_* mask given to the slab allocator
  345. * @nr_iovecs: number of iovecs to pre-allocate
  346. * @bs: the bio_set to allocate from.
  347. *
  348. * Description:
  349. * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
  350. * backed by the @bs's mempool.
  351. *
  352. * When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
  353. * always be able to allocate a bio. This is due to the mempool guarantees.
  354. * To make this work, callers must never allocate more than 1 bio at a time
  355. * from this pool. Callers that need to allocate more than 1 bio must always
  356. * submit the previously allocated bio for IO before attempting to allocate
  357. * a new one. Failure to do so can cause deadlocks under memory pressure.
  358. *
  359. * Note that when running under generic_make_request() (i.e. any block
  360. * driver), bios are not submitted until after you return - see the code in
  361. * generic_make_request() that converts recursion into iteration, to prevent
  362. * stack overflows.
  363. *
  364. * This would normally mean allocating multiple bios under
  365. * generic_make_request() would be susceptible to deadlocks, but we have
  366. * deadlock avoidance code that resubmits any blocked bios from a rescuer
  367. * thread.
  368. *
  369. * However, we do not guarantee forward progress for allocations from other
  370. * mempools. Doing multiple allocations from the same mempool under
  371. * generic_make_request() should be avoided - instead, use bio_set's front_pad
  372. * for per bio allocations.
  373. *
  374. * RETURNS:
  375. * Pointer to new bio on success, NULL on failure.
  376. */
  377. struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
  378. struct bio_set *bs)
  379. {
  380. gfp_t saved_gfp = gfp_mask;
  381. unsigned front_pad;
  382. unsigned inline_vecs;
  383. struct bio_vec *bvl = NULL;
  384. struct bio *bio;
  385. void *p;
  386. if (!bs) {
  387. if (nr_iovecs > UIO_MAXIOV)
  388. return NULL;
  389. p = kmalloc(sizeof(struct bio) +
  390. nr_iovecs * sizeof(struct bio_vec),
  391. gfp_mask);
  392. front_pad = 0;
  393. inline_vecs = nr_iovecs;
  394. } else {
  395. /* should not use nobvec bioset for nr_iovecs > 0 */
  396. if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
  397. nr_iovecs > 0))
  398. return NULL;
  399. /*
  400. * generic_make_request() converts recursion to iteration; this
  401. * means if we're running beneath it, any bios we allocate and
  402. * submit will not be submitted (and thus freed) until after we
  403. * return.
  404. *
  405. * This exposes us to a potential deadlock if we allocate
  406. * multiple bios from the same bio_set() while running
  407. * underneath generic_make_request(). If we were to allocate
  408. * multiple bios (say a stacking block driver that was splitting
  409. * bios), we would deadlock if we exhausted the mempool's
  410. * reserve.
  411. *
  412. * We solve this, and guarantee forward progress, with a rescuer
  413. * workqueue per bio_set. If we go to allocate and there are
  414. * bios on current->bio_list, we first try the allocation
  415. * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
  416. * bios we would be blocking to the rescuer workqueue before
  417. * we retry with the original gfp_flags.
  418. */
  419. if (current->bio_list &&
  420. (!bio_list_empty(&current->bio_list[0]) ||
  421. !bio_list_empty(&current->bio_list[1])) &&
  422. bs->rescue_workqueue)
  423. gfp_mask &= ~__GFP_DIRECT_RECLAIM;
  424. p = mempool_alloc(&bs->bio_pool, gfp_mask);
  425. if (!p && gfp_mask != saved_gfp) {
  426. punt_bios_to_rescuer(bs);
  427. gfp_mask = saved_gfp;
  428. p = mempool_alloc(&bs->bio_pool, gfp_mask);
  429. }
  430. front_pad = bs->front_pad;
  431. inline_vecs = BIO_INLINE_VECS;
  432. }
  433. if (unlikely(!p))
  434. return NULL;
  435. bio = p + front_pad;
  436. bio_init(bio, NULL, 0);
  437. if (nr_iovecs > inline_vecs) {
  438. unsigned long idx = 0;
  439. bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
  440. if (!bvl && gfp_mask != saved_gfp) {
  441. punt_bios_to_rescuer(bs);
  442. gfp_mask = saved_gfp;
  443. bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
  444. }
  445. if (unlikely(!bvl))
  446. goto err_free;
  447. bio->bi_flags |= idx << BVEC_POOL_OFFSET;
  448. } else if (nr_iovecs) {
  449. bvl = bio->bi_inline_vecs;
  450. }
  451. bio->bi_pool = bs;
  452. bio->bi_max_vecs = nr_iovecs;
  453. bio->bi_io_vec = bvl;
  454. return bio;
  455. err_free:
  456. mempool_free(p, &bs->bio_pool);
  457. return NULL;
  458. }
  459. EXPORT_SYMBOL(bio_alloc_bioset);
  460. void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
  461. {
  462. unsigned long flags;
  463. struct bio_vec bv;
  464. struct bvec_iter iter;
  465. __bio_for_each_segment(bv, bio, iter, start) {
  466. char *data = bvec_kmap_irq(&bv, &flags);
  467. memset(data, 0, bv.bv_len);
  468. flush_dcache_page(bv.bv_page);
  469. bvec_kunmap_irq(data, &flags);
  470. }
  471. }
  472. EXPORT_SYMBOL(zero_fill_bio_iter);
  473. /**
  474. * bio_put - release a reference to a bio
  475. * @bio: bio to release reference to
  476. *
  477. * Description:
  478. * Put a reference to a &struct bio, either one you have gotten with
  479. * bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
  480. **/
  481. void bio_put(struct bio *bio)
  482. {
  483. if (!bio_flagged(bio, BIO_REFFED))
  484. bio_free(bio);
  485. else {
  486. BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
  487. /*
  488. * last put frees it
  489. */
  490. if (atomic_dec_and_test(&bio->__bi_cnt))
  491. bio_free(bio);
  492. }
  493. }
  494. EXPORT_SYMBOL(bio_put);
  495. inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
  496. {
  497. if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
  498. blk_recount_segments(q, bio);
  499. return bio->bi_phys_segments;
  500. }
  501. EXPORT_SYMBOL(bio_phys_segments);
  502. /**
  503. * __bio_clone_fast - clone a bio that shares the original bio's biovec
  504. * @bio: destination bio
  505. * @bio_src: bio to clone
  506. *
  507. * Clone a &bio. Caller will own the returned bio, but not
  508. * the actual data it points to. Reference count of returned
  509. * bio will be one.
  510. *
  511. * Caller must ensure that @bio_src is not freed before @bio.
  512. */
  513. void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
  514. {
  515. BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
  516. /*
  517. * most users will be overriding ->bi_disk with a new target,
  518. * so we don't set nor calculate new physical/hw segment counts here
  519. */
  520. bio->bi_disk = bio_src->bi_disk;
  521. bio->bi_partno = bio_src->bi_partno;
  522. bio_set_flag(bio, BIO_CLONED);
  523. if (bio_flagged(bio_src, BIO_THROTTLED))
  524. bio_set_flag(bio, BIO_THROTTLED);
  525. bio->bi_opf = bio_src->bi_opf;
  526. bio->bi_ioprio = bio_src->bi_ioprio;
  527. bio->bi_write_hint = bio_src->bi_write_hint;
  528. bio->bi_iter = bio_src->bi_iter;
  529. bio->bi_io_vec = bio_src->bi_io_vec;
  530. bio_clone_blkcg_association(bio, bio_src);
  531. }
  532. EXPORT_SYMBOL(__bio_clone_fast);
  533. /**
  534. * bio_clone_fast - clone a bio that shares the original bio's biovec
  535. * @bio: bio to clone
  536. * @gfp_mask: allocation priority
  537. * @bs: bio_set to allocate from
  538. *
  539. * Like __bio_clone_fast, only also allocates the returned bio
  540. */
  541. struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
  542. {
  543. struct bio *b;
  544. b = bio_alloc_bioset(gfp_mask, 0, bs);
  545. if (!b)
  546. return NULL;
  547. __bio_clone_fast(b, bio);
  548. if (bio_integrity(bio)) {
  549. int ret;
  550. ret = bio_integrity_clone(b, bio, gfp_mask);
  551. if (ret < 0) {
  552. bio_put(b);
  553. return NULL;
  554. }
  555. }
  556. return b;
  557. }
  558. EXPORT_SYMBOL(bio_clone_fast);
  559. /**
  560. * bio_add_pc_page - attempt to add page to bio
  561. * @q: the target queue
  562. * @bio: destination bio
  563. * @page: page to add
  564. * @len: vec entry length
  565. * @offset: vec entry offset
  566. *
  567. * Attempt to add a page to the bio_vec maplist. This can fail for a
  568. * number of reasons, such as the bio being full or target block device
  569. * limitations. The target block device must allow bio's up to PAGE_SIZE,
  570. * so it is always possible to add a single page to an empty bio.
  571. *
  572. * This should only be used by REQ_PC bios.
  573. */
  574. int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
  575. *page, unsigned int len, unsigned int offset)
  576. {
  577. int retried_segments = 0;
  578. struct bio_vec *bvec;
  579. /*
  580. * cloned bio must not modify vec list
  581. */
  582. if (unlikely(bio_flagged(bio, BIO_CLONED)))
  583. return 0;
  584. if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
  585. return 0;
  586. /*
  587. * For filesystems with a blocksize smaller than the pagesize
  588. * we will often be called with the same page as last time and
  589. * a consecutive offset. Optimize this special case.
  590. */
  591. if (bio->bi_vcnt > 0) {
  592. struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
  593. if (page == prev->bv_page &&
  594. offset == prev->bv_offset + prev->bv_len) {
  595. prev->bv_len += len;
  596. bio->bi_iter.bi_size += len;
  597. goto done;
  598. }
  599. /*
  600. * If the queue doesn't support SG gaps and adding this
  601. * offset would create a gap, disallow it.
  602. */
  603. if (bvec_gap_to_prev(q, prev, offset))
  604. return 0;
  605. }
  606. if (bio_full(bio))
  607. return 0;
  608. /*
  609. * setup the new entry, we might clear it again later if we
  610. * cannot add the page
  611. */
  612. bvec = &bio->bi_io_vec[bio->bi_vcnt];
  613. bvec->bv_page = page;
  614. bvec->bv_len = len;
  615. bvec->bv_offset = offset;
  616. bio->bi_vcnt++;
  617. bio->bi_phys_segments++;
  618. bio->bi_iter.bi_size += len;
  619. /*
  620. * Perform a recount if the number of segments is greater
  621. * than queue_max_segments(q).
  622. */
  623. while (bio->bi_phys_segments > queue_max_segments(q)) {
  624. if (retried_segments)
  625. goto failed;
  626. retried_segments = 1;
  627. blk_recount_segments(q, bio);
  628. }
  629. /* If we may be able to merge these biovecs, force a recount */
  630. if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
  631. bio_clear_flag(bio, BIO_SEG_VALID);
  632. done:
  633. return len;
  634. failed:
  635. bvec->bv_page = NULL;
  636. bvec->bv_len = 0;
  637. bvec->bv_offset = 0;
  638. bio->bi_vcnt--;
  639. bio->bi_iter.bi_size -= len;
  640. blk_recount_segments(q, bio);
  641. return 0;
  642. }
  643. EXPORT_SYMBOL(bio_add_pc_page);
  644. /**
  645. * __bio_try_merge_page - try appending data to an existing bvec.
  646. * @bio: destination bio
  647. * @page: page to add
  648. * @len: length of the data to add
  649. * @off: offset of the data in @page
  650. *
  651. * Try to add the data at @page + @off to the last bvec of @bio. This is a
  652. * a useful optimisation for file systems with a block size smaller than the
  653. * page size.
  654. *
  655. * Return %true on success or %false on failure.
  656. */
  657. bool __bio_try_merge_page(struct bio *bio, struct page *page,
  658. unsigned int len, unsigned int off)
  659. {
  660. if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
  661. return false;
  662. if (bio->bi_vcnt > 0) {
  663. struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
  664. if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
  665. bv->bv_len += len;
  666. bio->bi_iter.bi_size += len;
  667. return true;
  668. }
  669. }
  670. return false;
  671. }
  672. EXPORT_SYMBOL_GPL(__bio_try_merge_page);
  673. /**
  674. * __bio_add_page - add page to a bio in a new segment
  675. * @bio: destination bio
  676. * @page: page to add
  677. * @len: length of the data to add
  678. * @off: offset of the data in @page
  679. *
  680. * Add the data at @page + @off to @bio as a new bvec. The caller must ensure
  681. * that @bio has space for another bvec.
  682. */
  683. void __bio_add_page(struct bio *bio, struct page *page,
  684. unsigned int len, unsigned int off)
  685. {
  686. struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
  687. WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
  688. WARN_ON_ONCE(bio_full(bio));
  689. bv->bv_page = page;
  690. bv->bv_offset = off;
  691. bv->bv_len = len;
  692. bio->bi_iter.bi_size += len;
  693. bio->bi_vcnt++;
  694. }
  695. EXPORT_SYMBOL_GPL(__bio_add_page);
  696. /**
  697. * bio_add_page - attempt to add page to bio
  698. * @bio: destination bio
  699. * @page: page to add
  700. * @len: vec entry length
  701. * @offset: vec entry offset
  702. *
  703. * Attempt to add a page to the bio_vec maplist. This will only fail
  704. * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
  705. */
  706. int bio_add_page(struct bio *bio, struct page *page,
  707. unsigned int len, unsigned int offset)
  708. {
  709. if (!__bio_try_merge_page(bio, page, len, offset)) {
  710. if (bio_full(bio))
  711. return 0;
  712. __bio_add_page(bio, page, len, offset);
  713. }
  714. return len;
  715. }
  716. EXPORT_SYMBOL(bio_add_page);
  717. /**
  718. * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
  719. * @bio: bio to add pages to
  720. * @iter: iov iterator describing the region to be mapped
  721. *
  722. * Pins pages from *iter and appends them to @bio's bvec array. The
  723. * pages will have to be released using put_page() when done.
  724. * For multi-segment *iter, this function only adds pages from the
  725. * the next non-empty segment of the iov iterator.
  726. */
  727. static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
  728. {
  729. unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
  730. struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
  731. struct page **pages = (struct page **)bv;
  732. size_t offset;
  733. ssize_t size;
  734. size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
  735. if (unlikely(size <= 0))
  736. return size ? size : -EFAULT;
  737. idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
  738. /*
  739. * Deep magic below: We need to walk the pinned pages backwards
  740. * because we are abusing the space allocated for the bio_vecs
  741. * for the page array. Because the bio_vecs are larger than the
  742. * page pointers by definition this will always work. But it also
  743. * means we can't use bio_add_page, so any changes to it's semantics
  744. * need to be reflected here as well.
  745. */
  746. bio->bi_iter.bi_size += size;
  747. bio->bi_vcnt += nr_pages;
  748. while (idx--) {
  749. bv[idx].bv_page = pages[idx];
  750. bv[idx].bv_len = PAGE_SIZE;
  751. bv[idx].bv_offset = 0;
  752. }
  753. bv[0].bv_offset += offset;
  754. bv[0].bv_len -= offset;
  755. bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
  756. iov_iter_advance(iter, size);
  757. return 0;
  758. }
  759. /**
  760. * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
  761. * @bio: bio to add pages to
  762. * @iter: iov iterator describing the region to be mapped
  763. *
  764. * Pins pages from *iter and appends them to @bio's bvec array. The
  765. * pages will have to be released using put_page() when done.
  766. * The function tries, but does not guarantee, to pin as many pages as
  767. * fit into the bio, or are requested in *iter, whatever is smaller.
  768. * If MM encounters an error pinning the requested pages, it stops.
  769. * Error is returned only if 0 pages could be pinned.
  770. */
  771. int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
  772. {
  773. unsigned short orig_vcnt = bio->bi_vcnt;
  774. do {
  775. int ret = __bio_iov_iter_get_pages(bio, iter);
  776. if (unlikely(ret))
  777. return bio->bi_vcnt > orig_vcnt ? 0 : ret;
  778. } while (iov_iter_count(iter) && !bio_full(bio));
  779. return 0;
  780. }
  781. EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
  782. static void submit_bio_wait_endio(struct bio *bio)
  783. {
  784. complete(bio->bi_private);
  785. }
  786. /**
  787. * submit_bio_wait - submit a bio, and wait until it completes
  788. * @bio: The &struct bio which describes the I/O
  789. *
  790. * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
  791. * bio_endio() on failure.
  792. *
  793. * WARNING: Unlike to how submit_bio() is usually used, this function does not
  794. * result in bio reference to be consumed. The caller must drop the reference
  795. * on his own.
  796. */
  797. int submit_bio_wait(struct bio *bio)
  798. {
  799. DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
  800. bio->bi_private = &done;
  801. bio->bi_end_io = submit_bio_wait_endio;
  802. bio->bi_opf |= REQ_SYNC;
  803. submit_bio(bio);
  804. wait_for_completion_io(&done);
  805. return blk_status_to_errno(bio->bi_status);
  806. }
  807. EXPORT_SYMBOL(submit_bio_wait);
  808. /**
  809. * bio_advance - increment/complete a bio by some number of bytes
  810. * @bio: bio to advance
  811. * @bytes: number of bytes to complete
  812. *
  813. * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
  814. * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
  815. * be updated on the last bvec as well.
  816. *
  817. * @bio will then represent the remaining, uncompleted portion of the io.
  818. */
  819. void bio_advance(struct bio *bio, unsigned bytes)
  820. {
  821. if (bio_integrity(bio))
  822. bio_integrity_advance(bio, bytes);
  823. bio_advance_iter(bio, &bio->bi_iter, bytes);
  824. }
  825. EXPORT_SYMBOL(bio_advance);
  826. void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
  827. struct bio *src, struct bvec_iter *src_iter)
  828. {
  829. struct bio_vec src_bv, dst_bv;
  830. void *src_p, *dst_p;
  831. unsigned bytes;
  832. while (src_iter->bi_size && dst_iter->bi_size) {
  833. src_bv = bio_iter_iovec(src, *src_iter);
  834. dst_bv = bio_iter_iovec(dst, *dst_iter);
  835. bytes = min(src_bv.bv_len, dst_bv.bv_len);
  836. src_p = kmap_atomic(src_bv.bv_page);
  837. dst_p = kmap_atomic(dst_bv.bv_page);
  838. memcpy(dst_p + dst_bv.bv_offset,
  839. src_p + src_bv.bv_offset,
  840. bytes);
  841. kunmap_atomic(dst_p);
  842. kunmap_atomic(src_p);
  843. flush_dcache_page(dst_bv.bv_page);
  844. bio_advance_iter(src, src_iter, bytes);
  845. bio_advance_iter(dst, dst_iter, bytes);
  846. }
  847. }
  848. EXPORT_SYMBOL(bio_copy_data_iter);
  849. /**
  850. * bio_copy_data - copy contents of data buffers from one bio to another
  851. * @src: source bio
  852. * @dst: destination bio
  853. *
  854. * Stops when it reaches the end of either @src or @dst - that is, copies
  855. * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
  856. */
  857. void bio_copy_data(struct bio *dst, struct bio *src)
  858. {
  859. struct bvec_iter src_iter = src->bi_iter;
  860. struct bvec_iter dst_iter = dst->bi_iter;
  861. bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
  862. }
  863. EXPORT_SYMBOL(bio_copy_data);
  864. /**
  865. * bio_list_copy_data - copy contents of data buffers from one chain of bios to
  866. * another
  867. * @src: source bio list
  868. * @dst: destination bio list
  869. *
  870. * Stops when it reaches the end of either the @src list or @dst list - that is,
  871. * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
  872. * bios).
  873. */
  874. void bio_list_copy_data(struct bio *dst, struct bio *src)
  875. {
  876. struct bvec_iter src_iter = src->bi_iter;
  877. struct bvec_iter dst_iter = dst->bi_iter;
  878. while (1) {
  879. if (!src_iter.bi_size) {
  880. src = src->bi_next;
  881. if (!src)
  882. break;
  883. src_iter = src->bi_iter;
  884. }
  885. if (!dst_iter.bi_size) {
  886. dst = dst->bi_next;
  887. if (!dst)
  888. break;
  889. dst_iter = dst->bi_iter;
  890. }
  891. bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
  892. }
  893. }
  894. EXPORT_SYMBOL(bio_list_copy_data);
  895. struct bio_map_data {
  896. int is_our_pages;
  897. struct iov_iter iter;
  898. struct iovec iov[];
  899. };
  900. static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
  901. gfp_t gfp_mask)
  902. {
  903. struct bio_map_data *bmd;
  904. if (data->nr_segs > UIO_MAXIOV)
  905. return NULL;
  906. bmd = kmalloc(sizeof(struct bio_map_data) +
  907. sizeof(struct iovec) * data->nr_segs, gfp_mask);
  908. if (!bmd)
  909. return NULL;
  910. memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
  911. bmd->iter = *data;
  912. bmd->iter.iov = bmd->iov;
  913. return bmd;
  914. }
  915. /**
  916. * bio_copy_from_iter - copy all pages from iov_iter to bio
  917. * @bio: The &struct bio which describes the I/O as destination
  918. * @iter: iov_iter as source
  919. *
  920. * Copy all pages from iov_iter to bio.
  921. * Returns 0 on success, or error on failure.
  922. */
  923. static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
  924. {
  925. int i;
  926. struct bio_vec *bvec;
  927. bio_for_each_segment_all(bvec, bio, i) {
  928. ssize_t ret;
  929. ret = copy_page_from_iter(bvec->bv_page,
  930. bvec->bv_offset,
  931. bvec->bv_len,
  932. iter);
  933. if (!iov_iter_count(iter))
  934. break;
  935. if (ret < bvec->bv_len)
  936. return -EFAULT;
  937. }
  938. return 0;
  939. }
  940. /**
  941. * bio_copy_to_iter - copy all pages from bio to iov_iter
  942. * @bio: The &struct bio which describes the I/O as source
  943. * @iter: iov_iter as destination
  944. *
  945. * Copy all pages from bio to iov_iter.
  946. * Returns 0 on success, or error on failure.
  947. */
  948. static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
  949. {
  950. int i;
  951. struct bio_vec *bvec;
  952. bio_for_each_segment_all(bvec, bio, i) {
  953. ssize_t ret;
  954. ret = copy_page_to_iter(bvec->bv_page,
  955. bvec->bv_offset,
  956. bvec->bv_len,
  957. &iter);
  958. if (!iov_iter_count(&iter))
  959. break;
  960. if (ret < bvec->bv_len)
  961. return -EFAULT;
  962. }
  963. return 0;
  964. }
  965. void bio_free_pages(struct bio *bio)
  966. {
  967. struct bio_vec *bvec;
  968. int i;
  969. bio_for_each_segment_all(bvec, bio, i)
  970. __free_page(bvec->bv_page);
  971. }
  972. EXPORT_SYMBOL(bio_free_pages);
  973. /**
  974. * bio_uncopy_user - finish previously mapped bio
  975. * @bio: bio being terminated
  976. *
  977. * Free pages allocated from bio_copy_user_iov() and write back data
  978. * to user space in case of a read.
  979. */
  980. int bio_uncopy_user(struct bio *bio)
  981. {
  982. struct bio_map_data *bmd = bio->bi_private;
  983. int ret = 0;
  984. if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
  985. /*
  986. * if we're in a workqueue, the request is orphaned, so
  987. * don't copy into a random user address space, just free
  988. * and return -EINTR so user space doesn't expect any data.
  989. */
  990. if (!current->mm)
  991. ret = -EINTR;
  992. else if (bio_data_dir(bio) == READ)
  993. ret = bio_copy_to_iter(bio, bmd->iter);
  994. if (bmd->is_our_pages)
  995. bio_free_pages(bio);
  996. }
  997. kfree(bmd);
  998. bio_put(bio);
  999. return ret;
  1000. }
  1001. /**
  1002. * bio_copy_user_iov - copy user data to bio
  1003. * @q: destination block queue
  1004. * @map_data: pointer to the rq_map_data holding pages (if necessary)
  1005. * @iter: iovec iterator
  1006. * @gfp_mask: memory allocation flags
  1007. *
  1008. * Prepares and returns a bio for indirect user io, bouncing data
  1009. * to/from kernel pages as necessary. Must be paired with
  1010. * call bio_uncopy_user() on io completion.
  1011. */
  1012. struct bio *bio_copy_user_iov(struct request_queue *q,
  1013. struct rq_map_data *map_data,
  1014. struct iov_iter *iter,
  1015. gfp_t gfp_mask)
  1016. {
  1017. struct bio_map_data *bmd;
  1018. struct page *page;
  1019. struct bio *bio;
  1020. int i = 0, ret;
  1021. int nr_pages;
  1022. unsigned int len = iter->count;
  1023. unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
  1024. bmd = bio_alloc_map_data(iter, gfp_mask);
  1025. if (!bmd)
  1026. return ERR_PTR(-ENOMEM);
  1027. /*
  1028. * We need to do a deep copy of the iov_iter including the iovecs.
  1029. * The caller provided iov might point to an on-stack or otherwise
  1030. * shortlived one.
  1031. */
  1032. bmd->is_our_pages = map_data ? 0 : 1;
  1033. nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
  1034. if (nr_pages > BIO_MAX_PAGES)
  1035. nr_pages = BIO_MAX_PAGES;
  1036. ret = -ENOMEM;
  1037. bio = bio_kmalloc(gfp_mask, nr_pages);
  1038. if (!bio)
  1039. goto out_bmd;
  1040. ret = 0;
  1041. if (map_data) {
  1042. nr_pages = 1 << map_data->page_order;
  1043. i = map_data->offset / PAGE_SIZE;
  1044. }
  1045. while (len) {
  1046. unsigned int bytes = PAGE_SIZE;
  1047. bytes -= offset;
  1048. if (bytes > len)
  1049. bytes = len;
  1050. if (map_data) {
  1051. if (i == map_data->nr_entries * nr_pages) {
  1052. ret = -ENOMEM;
  1053. break;
  1054. }
  1055. page = map_data->pages[i / nr_pages];
  1056. page += (i % nr_pages);
  1057. i++;
  1058. } else {
  1059. page = alloc_page(q->bounce_gfp | gfp_mask);
  1060. if (!page) {
  1061. ret = -ENOMEM;
  1062. break;
  1063. }
  1064. }
  1065. if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
  1066. if (!map_data)
  1067. __free_page(page);
  1068. break;
  1069. }
  1070. len -= bytes;
  1071. offset = 0;
  1072. }
  1073. if (ret)
  1074. goto cleanup;
  1075. if (map_data)
  1076. map_data->offset += bio->bi_iter.bi_size;
  1077. /*
  1078. * success
  1079. */
  1080. if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
  1081. (map_data && map_data->from_user)) {
  1082. ret = bio_copy_from_iter(bio, iter);
  1083. if (ret)
  1084. goto cleanup;
  1085. } else {
  1086. if (bmd->is_our_pages)
  1087. zero_fill_bio(bio);
  1088. iov_iter_advance(iter, bio->bi_iter.bi_size);
  1089. }
  1090. bio->bi_private = bmd;
  1091. if (map_data && map_data->null_mapped)
  1092. bio_set_flag(bio, BIO_NULL_MAPPED);
  1093. return bio;
  1094. cleanup:
  1095. if (!map_data)
  1096. bio_free_pages(bio);
  1097. bio_put(bio);
  1098. out_bmd:
  1099. kfree(bmd);
  1100. return ERR_PTR(ret);
  1101. }
  1102. /**
  1103. * bio_map_user_iov - map user iovec into bio
  1104. * @q: the struct request_queue for the bio
  1105. * @iter: iovec iterator
  1106. * @gfp_mask: memory allocation flags
  1107. *
  1108. * Map the user space address into a bio suitable for io to a block
  1109. * device. Returns an error pointer in case of error.
  1110. */
  1111. struct bio *bio_map_user_iov(struct request_queue *q,
  1112. struct iov_iter *iter,
  1113. gfp_t gfp_mask)
  1114. {
  1115. int j;
  1116. struct bio *bio;
  1117. int ret;
  1118. struct bio_vec *bvec;
  1119. if (!iov_iter_count(iter))
  1120. return ERR_PTR(-EINVAL);
  1121. bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
  1122. if (!bio)
  1123. return ERR_PTR(-ENOMEM);
  1124. while (iov_iter_count(iter)) {
  1125. struct page **pages;
  1126. ssize_t bytes;
  1127. size_t offs, added = 0;
  1128. int npages;
  1129. bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
  1130. if (unlikely(bytes <= 0)) {
  1131. ret = bytes ? bytes : -EFAULT;
  1132. goto out_unmap;
  1133. }
  1134. npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
  1135. if (unlikely(offs & queue_dma_alignment(q))) {
  1136. ret = -EINVAL;
  1137. j = 0;
  1138. } else {
  1139. for (j = 0; j < npages; j++) {
  1140. struct page *page = pages[j];
  1141. unsigned int n = PAGE_SIZE - offs;
  1142. unsigned short prev_bi_vcnt = bio->bi_vcnt;
  1143. if (n > bytes)
  1144. n = bytes;
  1145. if (!bio_add_pc_page(q, bio, page, n, offs))
  1146. break;
  1147. /*
  1148. * check if vector was merged with previous
  1149. * drop page reference if needed
  1150. */
  1151. if (bio->bi_vcnt == prev_bi_vcnt)
  1152. put_page(page);
  1153. added += n;
  1154. bytes -= n;
  1155. offs = 0;
  1156. }
  1157. iov_iter_advance(iter, added);
  1158. }
  1159. /*
  1160. * release the pages we didn't map into the bio, if any
  1161. */
  1162. while (j < npages)
  1163. put_page(pages[j++]);
  1164. kvfree(pages);
  1165. /* couldn't stuff something into bio? */
  1166. if (bytes)
  1167. break;
  1168. }
  1169. bio_set_flag(bio, BIO_USER_MAPPED);
  1170. /*
  1171. * subtle -- if bio_map_user_iov() ended up bouncing a bio,
  1172. * it would normally disappear when its bi_end_io is run.
  1173. * however, we need it for the unmap, so grab an extra
  1174. * reference to it
  1175. */
  1176. bio_get(bio);
  1177. return bio;
  1178. out_unmap:
  1179. bio_for_each_segment_all(bvec, bio, j) {
  1180. put_page(bvec->bv_page);
  1181. }
  1182. bio_put(bio);
  1183. return ERR_PTR(ret);
  1184. }
  1185. static void __bio_unmap_user(struct bio *bio)
  1186. {
  1187. struct bio_vec *bvec;
  1188. int i;
  1189. /*
  1190. * make sure we dirty pages we wrote to
  1191. */
  1192. bio_for_each_segment_all(bvec, bio, i) {
  1193. if (bio_data_dir(bio) == READ)
  1194. set_page_dirty_lock(bvec->bv_page);
  1195. put_page(bvec->bv_page);
  1196. }
  1197. bio_put(bio);
  1198. }
  1199. /**
  1200. * bio_unmap_user - unmap a bio
  1201. * @bio: the bio being unmapped
  1202. *
  1203. * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
  1204. * process context.
  1205. *
  1206. * bio_unmap_user() may sleep.
  1207. */
  1208. void bio_unmap_user(struct bio *bio)
  1209. {
  1210. __bio_unmap_user(bio);
  1211. bio_put(bio);
  1212. }
  1213. static void bio_map_kern_endio(struct bio *bio)
  1214. {
  1215. bio_put(bio);
  1216. }
  1217. /**
  1218. * bio_map_kern - map kernel address into bio
  1219. * @q: the struct request_queue for the bio
  1220. * @data: pointer to buffer to map
  1221. * @len: length in bytes
  1222. * @gfp_mask: allocation flags for bio allocation
  1223. *
  1224. * Map the kernel address into a bio suitable for io to a block
  1225. * device. Returns an error pointer in case of error.
  1226. */
  1227. struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
  1228. gfp_t gfp_mask)
  1229. {
  1230. unsigned long kaddr = (unsigned long)data;
  1231. unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  1232. unsigned long start = kaddr >> PAGE_SHIFT;
  1233. const int nr_pages = end - start;
  1234. int offset, i;
  1235. struct bio *bio;
  1236. bio = bio_kmalloc(gfp_mask, nr_pages);
  1237. if (!bio)
  1238. return ERR_PTR(-ENOMEM);
  1239. offset = offset_in_page(kaddr);
  1240. for (i = 0; i < nr_pages; i++) {
  1241. unsigned int bytes = PAGE_SIZE - offset;
  1242. if (len <= 0)
  1243. break;
  1244. if (bytes > len)
  1245. bytes = len;
  1246. if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
  1247. offset) < bytes) {
  1248. /* we don't support partial mappings */
  1249. bio_put(bio);
  1250. return ERR_PTR(-EINVAL);
  1251. }
  1252. data += bytes;
  1253. len -= bytes;
  1254. offset = 0;
  1255. }
  1256. bio->bi_end_io = bio_map_kern_endio;
  1257. return bio;
  1258. }
  1259. EXPORT_SYMBOL(bio_map_kern);
  1260. static void bio_copy_kern_endio(struct bio *bio)
  1261. {
  1262. bio_free_pages(bio);
  1263. bio_put(bio);
  1264. }
  1265. static void bio_copy_kern_endio_read(struct bio *bio)
  1266. {
  1267. char *p = bio->bi_private;
  1268. struct bio_vec *bvec;
  1269. int i;
  1270. bio_for_each_segment_all(bvec, bio, i) {
  1271. memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
  1272. p += bvec->bv_len;
  1273. }
  1274. bio_copy_kern_endio(bio);
  1275. }
  1276. /**
  1277. * bio_copy_kern - copy kernel address into bio
  1278. * @q: the struct request_queue for the bio
  1279. * @data: pointer to buffer to copy
  1280. * @len: length in bytes
  1281. * @gfp_mask: allocation flags for bio and page allocation
  1282. * @reading: data direction is READ
  1283. *
  1284. * copy the kernel address into a bio suitable for io to a block
  1285. * device. Returns an error pointer in case of error.
  1286. */
  1287. struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
  1288. gfp_t gfp_mask, int reading)
  1289. {
  1290. unsigned long kaddr = (unsigned long)data;
  1291. unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  1292. unsigned long start = kaddr >> PAGE_SHIFT;
  1293. struct bio *bio;
  1294. void *p = data;
  1295. int nr_pages = 0;
  1296. /*
  1297. * Overflow, abort
  1298. */
  1299. if (end < start)
  1300. return ERR_PTR(-EINVAL);
  1301. nr_pages = end - start;
  1302. bio = bio_kmalloc(gfp_mask, nr_pages);
  1303. if (!bio)
  1304. return ERR_PTR(-ENOMEM);
  1305. while (len) {
  1306. struct page *page;
  1307. unsigned int bytes = PAGE_SIZE;
  1308. if (bytes > len)
  1309. bytes = len;
  1310. page = alloc_page(q->bounce_gfp | gfp_mask);
  1311. if (!page)
  1312. goto cleanup;
  1313. if (!reading)
  1314. memcpy(page_address(page), p, bytes);
  1315. if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
  1316. break;
  1317. len -= bytes;
  1318. p += bytes;
  1319. }
  1320. if (reading) {
  1321. bio->bi_end_io = bio_copy_kern_endio_read;
  1322. bio->bi_private = data;
  1323. } else {
  1324. bio->bi_end_io = bio_copy_kern_endio;
  1325. }
  1326. return bio;
  1327. cleanup:
  1328. bio_free_pages(bio);
  1329. bio_put(bio);
  1330. return ERR_PTR(-ENOMEM);
  1331. }
  1332. /*
  1333. * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
  1334. * for performing direct-IO in BIOs.
  1335. *
  1336. * The problem is that we cannot run set_page_dirty() from interrupt context
  1337. * because the required locks are not interrupt-safe. So what we can do is to
  1338. * mark the pages dirty _before_ performing IO. And in interrupt context,
  1339. * check that the pages are still dirty. If so, fine. If not, redirty them
  1340. * in process context.
  1341. *
  1342. * We special-case compound pages here: normally this means reads into hugetlb
  1343. * pages. The logic in here doesn't really work right for compound pages
  1344. * because the VM does not uniformly chase down the head page in all cases.
  1345. * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
  1346. * handle them at all. So we skip compound pages here at an early stage.
  1347. *
  1348. * Note that this code is very hard to test under normal circumstances because
  1349. * direct-io pins the pages with get_user_pages(). This makes
  1350. * is_page_cache_freeable return false, and the VM will not clean the pages.
  1351. * But other code (eg, flusher threads) could clean the pages if they are mapped
  1352. * pagecache.
  1353. *
  1354. * Simply disabling the call to bio_set_pages_dirty() is a good way to test the
  1355. * deferred bio dirtying paths.
  1356. */
  1357. /*
  1358. * bio_set_pages_dirty() will mark all the bio's pages as dirty.
  1359. */
  1360. void bio_set_pages_dirty(struct bio *bio)
  1361. {
  1362. struct bio_vec *bvec;
  1363. int i;
  1364. bio_for_each_segment_all(bvec, bio, i) {
  1365. if (!PageCompound(bvec->bv_page))
  1366. set_page_dirty_lock(bvec->bv_page);
  1367. }
  1368. }
  1369. EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
  1370. static void bio_release_pages(struct bio *bio)
  1371. {
  1372. struct bio_vec *bvec;
  1373. int i;
  1374. bio_for_each_segment_all(bvec, bio, i)
  1375. put_page(bvec->bv_page);
  1376. }
  1377. /*
  1378. * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
  1379. * If they are, then fine. If, however, some pages are clean then they must
  1380. * have been written out during the direct-IO read. So we take another ref on
  1381. * the BIO and re-dirty the pages in process context.
  1382. *
  1383. * It is expected that bio_check_pages_dirty() will wholly own the BIO from
  1384. * here on. It will run one put_page() against each page and will run one
  1385. * bio_put() against the BIO.
  1386. */
  1387. static void bio_dirty_fn(struct work_struct *work);
  1388. static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
  1389. static DEFINE_SPINLOCK(bio_dirty_lock);
  1390. static struct bio *bio_dirty_list;
  1391. /*
  1392. * This runs in process context
  1393. */
  1394. static void bio_dirty_fn(struct work_struct *work)
  1395. {
  1396. struct bio *bio, *next;
  1397. spin_lock_irq(&bio_dirty_lock);
  1398. next = bio_dirty_list;
  1399. bio_dirty_list = NULL;
  1400. spin_unlock_irq(&bio_dirty_lock);
  1401. while ((bio = next) != NULL) {
  1402. next = bio->bi_private;
  1403. bio_set_pages_dirty(bio);
  1404. bio_release_pages(bio);
  1405. bio_put(bio);
  1406. }
  1407. }
  1408. void bio_check_pages_dirty(struct bio *bio)
  1409. {
  1410. struct bio_vec *bvec;
  1411. unsigned long flags;
  1412. int i;
  1413. bio_for_each_segment_all(bvec, bio, i) {
  1414. if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
  1415. goto defer;
  1416. }
  1417. bio_release_pages(bio);
  1418. bio_put(bio);
  1419. return;
  1420. defer:
  1421. spin_lock_irqsave(&bio_dirty_lock, flags);
  1422. bio->bi_private = bio_dirty_list;
  1423. bio_dirty_list = bio;
  1424. spin_unlock_irqrestore(&bio_dirty_lock, flags);
  1425. schedule_work(&bio_dirty_work);
  1426. }
  1427. EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
  1428. void generic_start_io_acct(struct request_queue *q, int op,
  1429. unsigned long sectors, struct hd_struct *part)
  1430. {
  1431. const int sgrp = op_stat_group(op);
  1432. int cpu = part_stat_lock();
  1433. part_round_stats(q, cpu, part);
  1434. part_stat_inc(cpu, part, ios[sgrp]);
  1435. part_stat_add(cpu, part, sectors[sgrp], sectors);
  1436. part_inc_in_flight(q, part, op_is_write(op));
  1437. part_stat_unlock();
  1438. }
  1439. EXPORT_SYMBOL(generic_start_io_acct);
  1440. void generic_end_io_acct(struct request_queue *q, int req_op,
  1441. struct hd_struct *part, unsigned long start_time)
  1442. {
  1443. unsigned long duration = jiffies - start_time;
  1444. const int sgrp = op_stat_group(req_op);
  1445. int cpu = part_stat_lock();
  1446. part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
  1447. part_round_stats(q, cpu, part);
  1448. part_dec_in_flight(q, part, op_is_write(req_op));
  1449. part_stat_unlock();
  1450. }
  1451. EXPORT_SYMBOL(generic_end_io_acct);
  1452. #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
  1453. void bio_flush_dcache_pages(struct bio *bi)
  1454. {
  1455. struct bio_vec bvec;
  1456. struct bvec_iter iter;
  1457. bio_for_each_segment(bvec, bi, iter)
  1458. flush_dcache_page(bvec.bv_page);
  1459. }
  1460. EXPORT_SYMBOL(bio_flush_dcache_pages);
  1461. #endif
  1462. static inline bool bio_remaining_done(struct bio *bio)
  1463. {
  1464. /*
  1465. * If we're not chaining, then ->__bi_remaining is always 1 and
  1466. * we always end io on the first invocation.
  1467. */
  1468. if (!bio_flagged(bio, BIO_CHAIN))
  1469. return true;
  1470. BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
  1471. if (atomic_dec_and_test(&bio->__bi_remaining)) {
  1472. bio_clear_flag(bio, BIO_CHAIN);
  1473. return true;
  1474. }
  1475. return false;
  1476. }
  1477. /**
  1478. * bio_endio - end I/O on a bio
  1479. * @bio: bio
  1480. *
  1481. * Description:
  1482. * bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
  1483. * way to end I/O on a bio. No one should call bi_end_io() directly on a
  1484. * bio unless they own it and thus know that it has an end_io function.
  1485. *
  1486. * bio_endio() can be called several times on a bio that has been chained
  1487. * using bio_chain(). The ->bi_end_io() function will only be called the
  1488. * last time. At this point the BLK_TA_COMPLETE tracing event will be
  1489. * generated if BIO_TRACE_COMPLETION is set.
  1490. **/
  1491. void bio_endio(struct bio *bio)
  1492. {
  1493. again:
  1494. if (!bio_remaining_done(bio))
  1495. return;
  1496. if (!bio_integrity_endio(bio))
  1497. return;
  1498. if (bio->bi_disk)
  1499. rq_qos_done_bio(bio->bi_disk->queue, bio);
  1500. /*
  1501. * Need to have a real endio function for chained bios, otherwise
  1502. * various corner cases will break (like stacking block devices that
  1503. * save/restore bi_end_io) - however, we want to avoid unbounded
  1504. * recursion and blowing the stack. Tail call optimization would
  1505. * handle this, but compiling with frame pointers also disables
  1506. * gcc's sibling call optimization.
  1507. */
  1508. if (bio->bi_end_io == bio_chain_endio) {
  1509. bio = __bio_chain_endio(bio);
  1510. goto again;
  1511. }
  1512. if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
  1513. trace_block_bio_complete(bio->bi_disk->queue, bio,
  1514. blk_status_to_errno(bio->bi_status));
  1515. bio_clear_flag(bio, BIO_TRACE_COMPLETION);
  1516. }
  1517. blk_throtl_bio_endio(bio);
  1518. /* release cgroup info */
  1519. bio_uninit(bio);
  1520. if (bio->bi_end_io)
  1521. bio->bi_end_io(bio);
  1522. }
  1523. EXPORT_SYMBOL(bio_endio);
  1524. /**
  1525. * bio_split - split a bio
  1526. * @bio: bio to split
  1527. * @sectors: number of sectors to split from the front of @bio
  1528. * @gfp: gfp mask
  1529. * @bs: bio set to allocate from
  1530. *
  1531. * Allocates and returns a new bio which represents @sectors from the start of
  1532. * @bio, and updates @bio to represent the remaining sectors.
  1533. *
  1534. * Unless this is a discard request the newly allocated bio will point
  1535. * to @bio's bi_io_vec; it is the caller's responsibility to ensure that
  1536. * @bio is not freed before the split.
  1537. */
  1538. struct bio *bio_split(struct bio *bio, int sectors,
  1539. gfp_t gfp, struct bio_set *bs)
  1540. {
  1541. struct bio *split;
  1542. BUG_ON(sectors <= 0);
  1543. BUG_ON(sectors >= bio_sectors(bio));
  1544. split = bio_clone_fast(bio, gfp, bs);
  1545. if (!split)
  1546. return NULL;
  1547. split->bi_iter.bi_size = sectors << 9;
  1548. if (bio_integrity(split))
  1549. bio_integrity_trim(split);
  1550. bio_advance(bio, split->bi_iter.bi_size);
  1551. bio->bi_iter.bi_done = 0;
  1552. if (bio_flagged(bio, BIO_TRACE_COMPLETION))
  1553. bio_set_flag(split, BIO_TRACE_COMPLETION);
  1554. return split;
  1555. }
  1556. EXPORT_SYMBOL(bio_split);
  1557. /**
  1558. * bio_trim - trim a bio
  1559. * @bio: bio to trim
  1560. * @offset: number of sectors to trim from the front of @bio
  1561. * @size: size we want to trim @bio to, in sectors
  1562. */
  1563. void bio_trim(struct bio *bio, int offset, int size)
  1564. {
  1565. /* 'bio' is a cloned bio which we need to trim to match
  1566. * the given offset and size.
  1567. */
  1568. size <<= 9;
  1569. if (offset == 0 && size == bio->bi_iter.bi_size)
  1570. return;
  1571. bio_clear_flag(bio, BIO_SEG_VALID);
  1572. bio_advance(bio, offset << 9);
  1573. bio->bi_iter.bi_size = size;
  1574. if (bio_integrity(bio))
  1575. bio_integrity_trim(bio);
  1576. }
  1577. EXPORT_SYMBOL_GPL(bio_trim);
  1578. /*
  1579. * create memory pools for biovec's in a bio_set.
  1580. * use the global biovec slabs created for general use.
  1581. */
  1582. int biovec_init_pool(mempool_t *pool, int pool_entries)
  1583. {
  1584. struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
  1585. return mempool_init_slab_pool(pool, pool_entries, bp->slab);
  1586. }
  1587. /*
  1588. * bioset_exit - exit a bioset initialized with bioset_init()
  1589. *
  1590. * May be called on a zeroed but uninitialized bioset (i.e. allocated with
  1591. * kzalloc()).
  1592. */
  1593. void bioset_exit(struct bio_set *bs)
  1594. {
  1595. if (bs->rescue_workqueue)
  1596. destroy_workqueue(bs->rescue_workqueue);
  1597. bs->rescue_workqueue = NULL;
  1598. mempool_exit(&bs->bio_pool);
  1599. mempool_exit(&bs->bvec_pool);
  1600. bioset_integrity_free(bs);
  1601. if (bs->bio_slab)
  1602. bio_put_slab(bs);
  1603. bs->bio_slab = NULL;
  1604. }
  1605. EXPORT_SYMBOL(bioset_exit);
  1606. /**
  1607. * bioset_init - Initialize a bio_set
  1608. * @bs: pool to initialize
  1609. * @pool_size: Number of bio and bio_vecs to cache in the mempool
  1610. * @front_pad: Number of bytes to allocate in front of the returned bio
  1611. * @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
  1612. * and %BIOSET_NEED_RESCUER
  1613. *
  1614. * Description:
  1615. * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
  1616. * to ask for a number of bytes to be allocated in front of the bio.
  1617. * Front pad allocation is useful for embedding the bio inside
  1618. * another structure, to avoid allocating extra data to go with the bio.
  1619. * Note that the bio must be embedded at the END of that structure always,
  1620. * or things will break badly.
  1621. * If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
  1622. * for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
  1623. * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
  1624. * dispatch queued requests when the mempool runs out of space.
  1625. *
  1626. */
  1627. int bioset_init(struct bio_set *bs,
  1628. unsigned int pool_size,
  1629. unsigned int front_pad,
  1630. int flags)
  1631. {
  1632. unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
  1633. bs->front_pad = front_pad;
  1634. spin_lock_init(&bs->rescue_lock);
  1635. bio_list_init(&bs->rescue_list);
  1636. INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
  1637. bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
  1638. if (!bs->bio_slab)
  1639. return -ENOMEM;
  1640. if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
  1641. goto bad;
  1642. if ((flags & BIOSET_NEED_BVECS) &&
  1643. biovec_init_pool(&bs->bvec_pool, pool_size))
  1644. goto bad;
  1645. if (!(flags & BIOSET_NEED_RESCUER))
  1646. return 0;
  1647. bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
  1648. if (!bs->rescue_workqueue)
  1649. goto bad;
  1650. return 0;
  1651. bad:
  1652. bioset_exit(bs);
  1653. return -ENOMEM;
  1654. }
  1655. EXPORT_SYMBOL(bioset_init);
  1656. /*
  1657. * Initialize and setup a new bio_set, based on the settings from
  1658. * another bio_set.
  1659. */
  1660. int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
  1661. {
  1662. int flags;
  1663. flags = 0;
  1664. if (src->bvec_pool.min_nr)
  1665. flags |= BIOSET_NEED_BVECS;
  1666. if (src->rescue_workqueue)
  1667. flags |= BIOSET_NEED_RESCUER;
  1668. return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
  1669. }
  1670. EXPORT_SYMBOL(bioset_init_from_src);
  1671. #ifdef CONFIG_BLK_CGROUP
  1672. #ifdef CONFIG_MEMCG
  1673. /**
  1674. * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
  1675. * @bio: target bio
  1676. * @page: the page to lookup the blkcg from
  1677. *
  1678. * Associate @bio with the blkcg from @page's owning memcg. This works like
  1679. * every other associate function wrt references.
  1680. */
  1681. int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
  1682. {
  1683. struct cgroup_subsys_state *blkcg_css;
  1684. if (unlikely(bio->bi_css))
  1685. return -EBUSY;
  1686. if (!page->mem_cgroup)
  1687. return 0;
  1688. blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
  1689. &io_cgrp_subsys);
  1690. bio->bi_css = blkcg_css;
  1691. return 0;
  1692. }
  1693. #endif /* CONFIG_MEMCG */
  1694. /**
  1695. * bio_associate_blkcg - associate a bio with the specified blkcg
  1696. * @bio: target bio
  1697. * @blkcg_css: css of the blkcg to associate
  1698. *
  1699. * Associate @bio with the blkcg specified by @blkcg_css. Block layer will
  1700. * treat @bio as if it were issued by a task which belongs to the blkcg.
  1701. *
  1702. * This function takes an extra reference of @blkcg_css which will be put
  1703. * when @bio is released. The caller must own @bio and is responsible for
  1704. * synchronizing calls to this function.
  1705. */
  1706. int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
  1707. {
  1708. if (unlikely(bio->bi_css))
  1709. return -EBUSY;
  1710. css_get(blkcg_css);
  1711. bio->bi_css = blkcg_css;
  1712. return 0;
  1713. }
  1714. EXPORT_SYMBOL_GPL(bio_associate_blkcg);
  1715. /**
  1716. * bio_associate_blkg - associate a bio with the specified blkg
  1717. * @bio: target bio
  1718. * @blkg: the blkg to associate
  1719. *
  1720. * Associate @bio with the blkg specified by @blkg. This is the queue specific
  1721. * blkcg information associated with the @bio, a reference will be taken on the
  1722. * @blkg and will be freed when the bio is freed.
  1723. */
  1724. int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
  1725. {
  1726. if (unlikely(bio->bi_blkg))
  1727. return -EBUSY;
  1728. if (!blkg_try_get(blkg))
  1729. return -ENODEV;
  1730. bio->bi_blkg = blkg;
  1731. return 0;
  1732. }
  1733. /**
  1734. * bio_disassociate_task - undo bio_associate_current()
  1735. * @bio: target bio
  1736. */
  1737. void bio_disassociate_task(struct bio *bio)
  1738. {
  1739. if (bio->bi_ioc) {
  1740. put_io_context(bio->bi_ioc);
  1741. bio->bi_ioc = NULL;
  1742. }
  1743. if (bio->bi_css) {
  1744. css_put(bio->bi_css);
  1745. bio->bi_css = NULL;
  1746. }
  1747. if (bio->bi_blkg) {
  1748. blkg_put(bio->bi_blkg);
  1749. bio->bi_blkg = NULL;
  1750. }
  1751. }
  1752. /**
  1753. * bio_clone_blkcg_association - clone blkcg association from src to dst bio
  1754. * @dst: destination bio
  1755. * @src: source bio
  1756. */
  1757. void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
  1758. {
  1759. if (src->bi_css)
  1760. WARN_ON(bio_associate_blkcg(dst, src->bi_css));
  1761. }
  1762. EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
  1763. #endif /* CONFIG_BLK_CGROUP */
  1764. static void __init biovec_init_slabs(void)
  1765. {
  1766. int i;
  1767. for (i = 0; i < BVEC_POOL_NR; i++) {
  1768. int size;
  1769. struct biovec_slab *bvs = bvec_slabs + i;
  1770. if (bvs->nr_vecs <= BIO_INLINE_VECS) {
  1771. bvs->slab = NULL;
  1772. continue;
  1773. }
  1774. size = bvs->nr_vecs * sizeof(struct bio_vec);
  1775. bvs->slab = kmem_cache_create(bvs->name, size, 0,
  1776. SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
  1777. }
  1778. }
  1779. static int __init init_bio(void)
  1780. {
  1781. bio_slab_max = 2;
  1782. bio_slab_nr = 0;
  1783. bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab),
  1784. GFP_KERNEL);
  1785. if (!bio_slabs)
  1786. panic("bio: can't allocate bios\n");
  1787. bio_integrity_init();
  1788. biovec_init_slabs();
  1789. if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
  1790. panic("bio: can't allocate bios\n");
  1791. if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
  1792. panic("bio: can't create integrity pool\n");
  1793. return 0;
  1794. }
  1795. subsys_initcall(init_bio);