data.c 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142
  1. /*
  2. * fs/f2fs/data.c
  3. *
  4. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  5. * http://www.samsung.com/
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. #include <linux/fs.h>
  12. #include <linux/f2fs_fs.h>
  13. #include <linux/buffer_head.h>
  14. #include <linux/mpage.h>
  15. #include <linux/writeback.h>
  16. #include <linux/backing-dev.h>
  17. #include <linux/blkdev.h>
  18. #include <linux/bio.h>
  19. #include <linux/prefetch.h>
  20. #include <linux/uio.h>
  21. #include <linux/cleancache.h>
  22. #include "f2fs.h"
  23. #include "node.h"
  24. #include "segment.h"
  25. #include "trace.h"
  26. #include <trace/events/f2fs.h>
  27. static struct kmem_cache *extent_tree_slab;
  28. static struct kmem_cache *extent_node_slab;
  29. static void f2fs_read_end_io(struct bio *bio, int err)
  30. {
  31. struct bio_vec *bvec;
  32. int i;
  33. if (f2fs_bio_encrypted(bio)) {
  34. if (err) {
  35. f2fs_release_crypto_ctx(bio->bi_private);
  36. } else {
  37. f2fs_end_io_crypto_work(bio->bi_private, bio);
  38. return;
  39. }
  40. }
  41. bio_for_each_segment_all(bvec, bio, i) {
  42. struct page *page = bvec->bv_page;
  43. if (!err) {
  44. SetPageUptodate(page);
  45. } else {
  46. ClearPageUptodate(page);
  47. SetPageError(page);
  48. }
  49. unlock_page(page);
  50. }
  51. bio_put(bio);
  52. }
  53. static void f2fs_write_end_io(struct bio *bio, int err)
  54. {
  55. struct f2fs_sb_info *sbi = bio->bi_private;
  56. struct bio_vec *bvec;
  57. int i;
  58. bio_for_each_segment_all(bvec, bio, i) {
  59. struct page *page = bvec->bv_page;
  60. f2fs_restore_and_release_control_page(&page);
  61. if (unlikely(err)) {
  62. set_page_dirty(page);
  63. set_bit(AS_EIO, &page->mapping->flags);
  64. f2fs_stop_checkpoint(sbi);
  65. }
  66. end_page_writeback(page);
  67. dec_page_count(sbi, F2FS_WRITEBACK);
  68. }
  69. if (!get_pages(sbi, F2FS_WRITEBACK) &&
  70. !list_empty(&sbi->cp_wait.task_list))
  71. wake_up(&sbi->cp_wait);
  72. bio_put(bio);
  73. }
  74. /*
  75. * Low-level block read/write IO operations.
  76. */
  77. static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
  78. int npages, bool is_read)
  79. {
  80. struct bio *bio;
  81. /* No failure on bio allocation */
  82. bio = bio_alloc(GFP_NOIO, npages);
  83. bio->bi_bdev = sbi->sb->s_bdev;
  84. bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
  85. bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
  86. bio->bi_private = is_read ? NULL : sbi;
  87. return bio;
  88. }
  89. static void __submit_merged_bio(struct f2fs_bio_info *io)
  90. {
  91. struct f2fs_io_info *fio = &io->fio;
  92. if (!io->bio)
  93. return;
  94. if (is_read_io(fio->rw))
  95. trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
  96. else
  97. trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
  98. submit_bio(fio->rw, io->bio);
  99. io->bio = NULL;
  100. }
  101. void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
  102. enum page_type type, int rw)
  103. {
  104. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  105. struct f2fs_bio_info *io;
  106. io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
  107. down_write(&io->io_rwsem);
  108. /* change META to META_FLUSH in the checkpoint procedure */
  109. if (type >= META_FLUSH) {
  110. io->fio.type = META_FLUSH;
  111. if (test_opt(sbi, NOBARRIER))
  112. io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
  113. else
  114. io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
  115. }
  116. __submit_merged_bio(io);
  117. up_write(&io->io_rwsem);
  118. }
  119. /*
  120. * Fill the locked page with data located in the block address.
  121. * Return unlocked page.
  122. */
  123. int f2fs_submit_page_bio(struct f2fs_io_info *fio)
  124. {
  125. struct bio *bio;
  126. struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
  127. trace_f2fs_submit_page_bio(page, fio);
  128. f2fs_trace_ios(fio, 0);
  129. /* Allocate a new bio */
  130. bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw));
  131. if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
  132. bio_put(bio);
  133. f2fs_put_page(page, 1);
  134. return -EFAULT;
  135. }
  136. submit_bio(fio->rw, bio);
  137. return 0;
  138. }
  139. void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
  140. {
  141. struct f2fs_sb_info *sbi = fio->sbi;
  142. enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
  143. struct f2fs_bio_info *io;
  144. bool is_read = is_read_io(fio->rw);
  145. struct page *bio_page;
  146. io = is_read ? &sbi->read_io : &sbi->write_io[btype];
  147. verify_block_addr(sbi, fio->blk_addr);
  148. down_write(&io->io_rwsem);
  149. if (!is_read)
  150. inc_page_count(sbi, F2FS_WRITEBACK);
  151. if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
  152. io->fio.rw != fio->rw))
  153. __submit_merged_bio(io);
  154. alloc_new:
  155. if (io->bio == NULL) {
  156. int bio_blocks = MAX_BIO_BLOCKS(sbi);
  157. io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
  158. io->fio = *fio;
  159. }
  160. bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
  161. if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
  162. PAGE_CACHE_SIZE) {
  163. __submit_merged_bio(io);
  164. goto alloc_new;
  165. }
  166. io->last_block_in_bio = fio->blk_addr;
  167. f2fs_trace_ios(fio, 0);
  168. up_write(&io->io_rwsem);
  169. trace_f2fs_submit_page_mbio(fio->page, fio);
  170. }
  171. /*
  172. * Lock ordering for the change of data block address:
  173. * ->data_page
  174. * ->node_page
  175. * update block addresses in the node page
  176. */
  177. void set_data_blkaddr(struct dnode_of_data *dn)
  178. {
  179. struct f2fs_node *rn;
  180. __le32 *addr_array;
  181. struct page *node_page = dn->node_page;
  182. unsigned int ofs_in_node = dn->ofs_in_node;
  183. f2fs_wait_on_page_writeback(node_page, NODE);
  184. rn = F2FS_NODE(node_page);
  185. /* Get physical address of data block */
  186. addr_array = blkaddr_in_node(rn);
  187. addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
  188. set_page_dirty(node_page);
  189. }
  190. int reserve_new_block(struct dnode_of_data *dn)
  191. {
  192. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  193. if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
  194. return -EPERM;
  195. if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
  196. return -ENOSPC;
  197. trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
  198. dn->data_blkaddr = NEW_ADDR;
  199. set_data_blkaddr(dn);
  200. mark_inode_dirty(dn->inode);
  201. sync_inode_page(dn);
  202. return 0;
  203. }
  204. int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
  205. {
  206. bool need_put = dn->inode_page ? false : true;
  207. int err;
  208. err = get_dnode_of_data(dn, index, ALLOC_NODE);
  209. if (err)
  210. return err;
  211. if (dn->data_blkaddr == NULL_ADDR)
  212. err = reserve_new_block(dn);
  213. if (err || need_put)
  214. f2fs_put_dnode(dn);
  215. return err;
  216. }
  217. static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
  218. struct extent_info *ei)
  219. {
  220. struct f2fs_inode_info *fi = F2FS_I(inode);
  221. pgoff_t start_fofs, end_fofs;
  222. block_t start_blkaddr;
  223. read_lock(&fi->ext_lock);
  224. if (fi->ext.len == 0) {
  225. read_unlock(&fi->ext_lock);
  226. return false;
  227. }
  228. stat_inc_total_hit(inode->i_sb);
  229. start_fofs = fi->ext.fofs;
  230. end_fofs = fi->ext.fofs + fi->ext.len - 1;
  231. start_blkaddr = fi->ext.blk;
  232. if (pgofs >= start_fofs && pgofs <= end_fofs) {
  233. *ei = fi->ext;
  234. stat_inc_read_hit(inode->i_sb);
  235. read_unlock(&fi->ext_lock);
  236. return true;
  237. }
  238. read_unlock(&fi->ext_lock);
  239. return false;
  240. }
  241. static bool update_extent_info(struct inode *inode, pgoff_t fofs,
  242. block_t blkaddr)
  243. {
  244. struct f2fs_inode_info *fi = F2FS_I(inode);
  245. pgoff_t start_fofs, end_fofs;
  246. block_t start_blkaddr, end_blkaddr;
  247. int need_update = true;
  248. write_lock(&fi->ext_lock);
  249. start_fofs = fi->ext.fofs;
  250. end_fofs = fi->ext.fofs + fi->ext.len - 1;
  251. start_blkaddr = fi->ext.blk;
  252. end_blkaddr = fi->ext.blk + fi->ext.len - 1;
  253. /* Drop and initialize the matched extent */
  254. if (fi->ext.len == 1 && fofs == start_fofs)
  255. fi->ext.len = 0;
  256. /* Initial extent */
  257. if (fi->ext.len == 0) {
  258. if (blkaddr != NULL_ADDR) {
  259. fi->ext.fofs = fofs;
  260. fi->ext.blk = blkaddr;
  261. fi->ext.len = 1;
  262. }
  263. goto end_update;
  264. }
  265. /* Front merge */
  266. if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
  267. fi->ext.fofs--;
  268. fi->ext.blk--;
  269. fi->ext.len++;
  270. goto end_update;
  271. }
  272. /* Back merge */
  273. if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
  274. fi->ext.len++;
  275. goto end_update;
  276. }
  277. /* Split the existing extent */
  278. if (fi->ext.len > 1 &&
  279. fofs >= start_fofs && fofs <= end_fofs) {
  280. if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
  281. fi->ext.len = fofs - start_fofs;
  282. } else {
  283. fi->ext.fofs = fofs + 1;
  284. fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
  285. fi->ext.len -= fofs - start_fofs + 1;
  286. }
  287. } else {
  288. need_update = false;
  289. }
  290. /* Finally, if the extent is very fragmented, let's drop the cache. */
  291. if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
  292. fi->ext.len = 0;
  293. set_inode_flag(fi, FI_NO_EXTENT);
  294. need_update = true;
  295. }
  296. end_update:
  297. write_unlock(&fi->ext_lock);
  298. return need_update;
  299. }
  300. static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
  301. struct extent_tree *et, struct extent_info *ei,
  302. struct rb_node *parent, struct rb_node **p)
  303. {
  304. struct extent_node *en;
  305. en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
  306. if (!en)
  307. return NULL;
  308. en->ei = *ei;
  309. INIT_LIST_HEAD(&en->list);
  310. rb_link_node(&en->rb_node, parent, p);
  311. rb_insert_color(&en->rb_node, &et->root);
  312. et->count++;
  313. atomic_inc(&sbi->total_ext_node);
  314. return en;
  315. }
  316. static void __detach_extent_node(struct f2fs_sb_info *sbi,
  317. struct extent_tree *et, struct extent_node *en)
  318. {
  319. rb_erase(&en->rb_node, &et->root);
  320. et->count--;
  321. atomic_dec(&sbi->total_ext_node);
  322. if (et->cached_en == en)
  323. et->cached_en = NULL;
  324. }
  325. static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
  326. nid_t ino)
  327. {
  328. struct extent_tree *et;
  329. down_read(&sbi->extent_tree_lock);
  330. et = radix_tree_lookup(&sbi->extent_tree_root, ino);
  331. if (!et) {
  332. up_read(&sbi->extent_tree_lock);
  333. return NULL;
  334. }
  335. atomic_inc(&et->refcount);
  336. up_read(&sbi->extent_tree_lock);
  337. return et;
  338. }
  339. static struct extent_tree *__grab_extent_tree(struct inode *inode)
  340. {
  341. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  342. struct extent_tree *et;
  343. nid_t ino = inode->i_ino;
  344. down_write(&sbi->extent_tree_lock);
  345. et = radix_tree_lookup(&sbi->extent_tree_root, ino);
  346. if (!et) {
  347. et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
  348. f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
  349. memset(et, 0, sizeof(struct extent_tree));
  350. et->ino = ino;
  351. et->root = RB_ROOT;
  352. et->cached_en = NULL;
  353. rwlock_init(&et->lock);
  354. atomic_set(&et->refcount, 0);
  355. et->count = 0;
  356. sbi->total_ext_tree++;
  357. }
  358. atomic_inc(&et->refcount);
  359. up_write(&sbi->extent_tree_lock);
  360. return et;
  361. }
  362. static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
  363. unsigned int fofs)
  364. {
  365. struct rb_node *node = et->root.rb_node;
  366. struct extent_node *en;
  367. if (et->cached_en) {
  368. struct extent_info *cei = &et->cached_en->ei;
  369. if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
  370. return et->cached_en;
  371. }
  372. while (node) {
  373. en = rb_entry(node, struct extent_node, rb_node);
  374. if (fofs < en->ei.fofs) {
  375. node = node->rb_left;
  376. } else if (fofs >= en->ei.fofs + en->ei.len) {
  377. node = node->rb_right;
  378. } else {
  379. et->cached_en = en;
  380. return en;
  381. }
  382. }
  383. return NULL;
  384. }
  385. static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
  386. struct extent_tree *et, struct extent_node *en)
  387. {
  388. struct extent_node *prev;
  389. struct rb_node *node;
  390. node = rb_prev(&en->rb_node);
  391. if (!node)
  392. return NULL;
  393. prev = rb_entry(node, struct extent_node, rb_node);
  394. if (__is_back_mergeable(&en->ei, &prev->ei)) {
  395. en->ei.fofs = prev->ei.fofs;
  396. en->ei.blk = prev->ei.blk;
  397. en->ei.len += prev->ei.len;
  398. __detach_extent_node(sbi, et, prev);
  399. return prev;
  400. }
  401. return NULL;
  402. }
  403. static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
  404. struct extent_tree *et, struct extent_node *en)
  405. {
  406. struct extent_node *next;
  407. struct rb_node *node;
  408. node = rb_next(&en->rb_node);
  409. if (!node)
  410. return NULL;
  411. next = rb_entry(node, struct extent_node, rb_node);
  412. if (__is_front_mergeable(&en->ei, &next->ei)) {
  413. en->ei.len += next->ei.len;
  414. __detach_extent_node(sbi, et, next);
  415. return next;
  416. }
  417. return NULL;
  418. }
  419. static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
  420. struct extent_tree *et, struct extent_info *ei,
  421. struct extent_node **den)
  422. {
  423. struct rb_node **p = &et->root.rb_node;
  424. struct rb_node *parent = NULL;
  425. struct extent_node *en;
  426. while (*p) {
  427. parent = *p;
  428. en = rb_entry(parent, struct extent_node, rb_node);
  429. if (ei->fofs < en->ei.fofs) {
  430. if (__is_front_mergeable(ei, &en->ei)) {
  431. f2fs_bug_on(sbi, !den);
  432. en->ei.fofs = ei->fofs;
  433. en->ei.blk = ei->blk;
  434. en->ei.len += ei->len;
  435. *den = __try_back_merge(sbi, et, en);
  436. return en;
  437. }
  438. p = &(*p)->rb_left;
  439. } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
  440. if (__is_back_mergeable(ei, &en->ei)) {
  441. f2fs_bug_on(sbi, !den);
  442. en->ei.len += ei->len;
  443. *den = __try_front_merge(sbi, et, en);
  444. return en;
  445. }
  446. p = &(*p)->rb_right;
  447. } else {
  448. f2fs_bug_on(sbi, 1);
  449. }
  450. }
  451. return __attach_extent_node(sbi, et, ei, parent, p);
  452. }
  453. static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
  454. struct extent_tree *et, bool free_all)
  455. {
  456. struct rb_node *node, *next;
  457. struct extent_node *en;
  458. unsigned int count = et->count;
  459. node = rb_first(&et->root);
  460. while (node) {
  461. next = rb_next(node);
  462. en = rb_entry(node, struct extent_node, rb_node);
  463. if (free_all) {
  464. spin_lock(&sbi->extent_lock);
  465. if (!list_empty(&en->list))
  466. list_del_init(&en->list);
  467. spin_unlock(&sbi->extent_lock);
  468. }
  469. if (free_all || list_empty(&en->list)) {
  470. __detach_extent_node(sbi, et, en);
  471. kmem_cache_free(extent_node_slab, en);
  472. }
  473. node = next;
  474. }
  475. return count - et->count;
  476. }
  477. static void f2fs_init_extent_tree(struct inode *inode,
  478. struct f2fs_extent *i_ext)
  479. {
  480. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  481. struct extent_tree *et;
  482. struct extent_node *en;
  483. struct extent_info ei;
  484. if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
  485. return;
  486. et = __grab_extent_tree(inode);
  487. write_lock(&et->lock);
  488. if (et->count)
  489. goto out;
  490. set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
  491. le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
  492. en = __insert_extent_tree(sbi, et, &ei, NULL);
  493. if (en) {
  494. et->cached_en = en;
  495. spin_lock(&sbi->extent_lock);
  496. list_add_tail(&en->list, &sbi->extent_list);
  497. spin_unlock(&sbi->extent_lock);
  498. }
  499. out:
  500. write_unlock(&et->lock);
  501. atomic_dec(&et->refcount);
  502. }
  503. static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
  504. struct extent_info *ei)
  505. {
  506. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  507. struct extent_tree *et;
  508. struct extent_node *en;
  509. trace_f2fs_lookup_extent_tree_start(inode, pgofs);
  510. et = __find_extent_tree(sbi, inode->i_ino);
  511. if (!et)
  512. return false;
  513. read_lock(&et->lock);
  514. en = __lookup_extent_tree(et, pgofs);
  515. if (en) {
  516. *ei = en->ei;
  517. spin_lock(&sbi->extent_lock);
  518. if (!list_empty(&en->list))
  519. list_move_tail(&en->list, &sbi->extent_list);
  520. spin_unlock(&sbi->extent_lock);
  521. stat_inc_read_hit(sbi->sb);
  522. }
  523. stat_inc_total_hit(sbi->sb);
  524. read_unlock(&et->lock);
  525. trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
  526. atomic_dec(&et->refcount);
  527. return en ? true : false;
  528. }
  529. static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
  530. block_t blkaddr)
  531. {
  532. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  533. struct extent_tree *et;
  534. struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
  535. struct extent_node *den = NULL;
  536. struct extent_info ei, dei;
  537. unsigned int endofs;
  538. trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
  539. et = __grab_extent_tree(inode);
  540. write_lock(&et->lock);
  541. /* 1. lookup and remove existing extent info in cache */
  542. en = __lookup_extent_tree(et, fofs);
  543. if (!en)
  544. goto update_extent;
  545. dei = en->ei;
  546. __detach_extent_node(sbi, et, en);
  547. /* 2. if extent can be split more, split and insert the left part */
  548. if (dei.len > 1) {
  549. /* insert left part of split extent into cache */
  550. if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
  551. set_extent_info(&ei, dei.fofs, dei.blk,
  552. fofs - dei.fofs);
  553. en1 = __insert_extent_tree(sbi, et, &ei, NULL);
  554. }
  555. /* insert right part of split extent into cache */
  556. endofs = dei.fofs + dei.len - 1;
  557. if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
  558. set_extent_info(&ei, fofs + 1,
  559. fofs - dei.fofs + dei.blk, endofs - fofs);
  560. en2 = __insert_extent_tree(sbi, et, &ei, NULL);
  561. }
  562. }
  563. update_extent:
  564. /* 3. update extent in extent cache */
  565. if (blkaddr) {
  566. set_extent_info(&ei, fofs, blkaddr, 1);
  567. en3 = __insert_extent_tree(sbi, et, &ei, &den);
  568. }
  569. /* 4. update in global extent list */
  570. spin_lock(&sbi->extent_lock);
  571. if (en && !list_empty(&en->list))
  572. list_del(&en->list);
  573. /*
  574. * en1 and en2 split from en, they will become more and more smaller
  575. * fragments after splitting several times. So if the length is smaller
  576. * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
  577. */
  578. if (en1)
  579. list_add_tail(&en1->list, &sbi->extent_list);
  580. if (en2)
  581. list_add_tail(&en2->list, &sbi->extent_list);
  582. if (en3) {
  583. if (list_empty(&en3->list))
  584. list_add_tail(&en3->list, &sbi->extent_list);
  585. else
  586. list_move_tail(&en3->list, &sbi->extent_list);
  587. }
  588. if (den && !list_empty(&den->list))
  589. list_del(&den->list);
  590. spin_unlock(&sbi->extent_lock);
  591. /* 5. release extent node */
  592. if (en)
  593. kmem_cache_free(extent_node_slab, en);
  594. if (den)
  595. kmem_cache_free(extent_node_slab, den);
  596. write_unlock(&et->lock);
  597. atomic_dec(&et->refcount);
  598. }
  599. void f2fs_preserve_extent_tree(struct inode *inode)
  600. {
  601. struct extent_tree *et;
  602. struct extent_info *ext = &F2FS_I(inode)->ext;
  603. bool sync = false;
  604. if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
  605. return;
  606. et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
  607. if (!et) {
  608. if (ext->len) {
  609. ext->len = 0;
  610. update_inode_page(inode);
  611. }
  612. return;
  613. }
  614. read_lock(&et->lock);
  615. if (et->count) {
  616. struct extent_node *en;
  617. if (et->cached_en) {
  618. en = et->cached_en;
  619. } else {
  620. struct rb_node *node = rb_first(&et->root);
  621. if (!node)
  622. node = rb_last(&et->root);
  623. en = rb_entry(node, struct extent_node, rb_node);
  624. }
  625. if (__is_extent_same(ext, &en->ei))
  626. goto out;
  627. *ext = en->ei;
  628. sync = true;
  629. } else if (ext->len) {
  630. ext->len = 0;
  631. sync = true;
  632. }
  633. out:
  634. read_unlock(&et->lock);
  635. atomic_dec(&et->refcount);
  636. if (sync)
  637. update_inode_page(inode);
  638. }
  639. void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
  640. {
  641. struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
  642. struct extent_node *en, *tmp;
  643. unsigned long ino = F2FS_ROOT_INO(sbi);
  644. struct radix_tree_iter iter;
  645. void **slot;
  646. unsigned int found;
  647. unsigned int node_cnt = 0, tree_cnt = 0;
  648. if (!test_opt(sbi, EXTENT_CACHE))
  649. return;
  650. if (available_free_memory(sbi, EXTENT_CACHE))
  651. return;
  652. spin_lock(&sbi->extent_lock);
  653. list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
  654. if (!nr_shrink--)
  655. break;
  656. list_del_init(&en->list);
  657. }
  658. spin_unlock(&sbi->extent_lock);
  659. down_read(&sbi->extent_tree_lock);
  660. while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
  661. (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
  662. unsigned i;
  663. ino = treevec[found - 1]->ino + 1;
  664. for (i = 0; i < found; i++) {
  665. struct extent_tree *et = treevec[i];
  666. atomic_inc(&et->refcount);
  667. write_lock(&et->lock);
  668. node_cnt += __free_extent_tree(sbi, et, false);
  669. write_unlock(&et->lock);
  670. atomic_dec(&et->refcount);
  671. }
  672. }
  673. up_read(&sbi->extent_tree_lock);
  674. down_write(&sbi->extent_tree_lock);
  675. radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
  676. F2FS_ROOT_INO(sbi)) {
  677. struct extent_tree *et = (struct extent_tree *)*slot;
  678. if (!atomic_read(&et->refcount) && !et->count) {
  679. radix_tree_delete(&sbi->extent_tree_root, et->ino);
  680. kmem_cache_free(extent_tree_slab, et);
  681. sbi->total_ext_tree--;
  682. tree_cnt++;
  683. }
  684. }
  685. up_write(&sbi->extent_tree_lock);
  686. trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
  687. }
  688. void f2fs_destroy_extent_tree(struct inode *inode)
  689. {
  690. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  691. struct extent_tree *et;
  692. unsigned int node_cnt = 0;
  693. if (!test_opt(sbi, EXTENT_CACHE))
  694. return;
  695. et = __find_extent_tree(sbi, inode->i_ino);
  696. if (!et)
  697. goto out;
  698. /* free all extent info belong to this extent tree */
  699. write_lock(&et->lock);
  700. node_cnt = __free_extent_tree(sbi, et, true);
  701. write_unlock(&et->lock);
  702. atomic_dec(&et->refcount);
  703. /* try to find and delete extent tree entry in radix tree */
  704. down_write(&sbi->extent_tree_lock);
  705. et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
  706. if (!et) {
  707. up_write(&sbi->extent_tree_lock);
  708. goto out;
  709. }
  710. f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
  711. radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
  712. kmem_cache_free(extent_tree_slab, et);
  713. sbi->total_ext_tree--;
  714. up_write(&sbi->extent_tree_lock);
  715. out:
  716. trace_f2fs_destroy_extent_tree(inode, node_cnt);
  717. return;
  718. }
  719. void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
  720. {
  721. if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
  722. f2fs_init_extent_tree(inode, i_ext);
  723. write_lock(&F2FS_I(inode)->ext_lock);
  724. get_extent_info(&F2FS_I(inode)->ext, *i_ext);
  725. write_unlock(&F2FS_I(inode)->ext_lock);
  726. }
  727. static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
  728. struct extent_info *ei)
  729. {
  730. if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
  731. return false;
  732. if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
  733. return f2fs_lookup_extent_tree(inode, pgofs, ei);
  734. return lookup_extent_info(inode, pgofs, ei);
  735. }
  736. void f2fs_update_extent_cache(struct dnode_of_data *dn)
  737. {
  738. struct f2fs_inode_info *fi = F2FS_I(dn->inode);
  739. pgoff_t fofs;
  740. f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
  741. if (is_inode_flag_set(fi, FI_NO_EXTENT))
  742. return;
  743. fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
  744. dn->ofs_in_node;
  745. if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
  746. return f2fs_update_extent_tree(dn->inode, fofs,
  747. dn->data_blkaddr);
  748. if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
  749. sync_inode_page(dn);
  750. }
  751. struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
  752. {
  753. struct address_space *mapping = inode->i_mapping;
  754. struct dnode_of_data dn;
  755. struct page *page;
  756. struct extent_info ei;
  757. int err;
  758. struct f2fs_io_info fio = {
  759. .sbi = F2FS_I_SB(inode),
  760. .type = DATA,
  761. .rw = rw,
  762. .encrypted_page = NULL,
  763. };
  764. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  765. return read_mapping_page(mapping, index, NULL);
  766. page = grab_cache_page(mapping, index);
  767. if (!page)
  768. return ERR_PTR(-ENOMEM);
  769. if (f2fs_lookup_extent_cache(inode, index, &ei)) {
  770. dn.data_blkaddr = ei.blk + index - ei.fofs;
  771. goto got_it;
  772. }
  773. set_new_dnode(&dn, inode, NULL, NULL, 0);
  774. err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
  775. if (err) {
  776. f2fs_put_page(page, 1);
  777. return ERR_PTR(err);
  778. }
  779. f2fs_put_dnode(&dn);
  780. if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
  781. f2fs_put_page(page, 1);
  782. return ERR_PTR(-ENOENT);
  783. }
  784. got_it:
  785. if (PageUptodate(page)) {
  786. unlock_page(page);
  787. return page;
  788. }
  789. /*
  790. * A new dentry page is allocated but not able to be written, since its
  791. * new inode page couldn't be allocated due to -ENOSPC.
  792. * In such the case, its blkaddr can be remained as NEW_ADDR.
  793. * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
  794. */
  795. if (dn.data_blkaddr == NEW_ADDR) {
  796. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  797. SetPageUptodate(page);
  798. unlock_page(page);
  799. return page;
  800. }
  801. fio.blk_addr = dn.data_blkaddr;
  802. fio.page = page;
  803. err = f2fs_submit_page_bio(&fio);
  804. if (err)
  805. return ERR_PTR(err);
  806. return page;
  807. }
  808. struct page *find_data_page(struct inode *inode, pgoff_t index)
  809. {
  810. struct address_space *mapping = inode->i_mapping;
  811. struct page *page;
  812. page = find_get_page(mapping, index);
  813. if (page && PageUptodate(page))
  814. return page;
  815. f2fs_put_page(page, 0);
  816. page = get_read_data_page(inode, index, READ_SYNC);
  817. if (IS_ERR(page))
  818. return page;
  819. if (PageUptodate(page))
  820. return page;
  821. wait_on_page_locked(page);
  822. if (unlikely(!PageUptodate(page))) {
  823. f2fs_put_page(page, 0);
  824. return ERR_PTR(-EIO);
  825. }
  826. return page;
  827. }
  828. /*
  829. * If it tries to access a hole, return an error.
  830. * Because, the callers, functions in dir.c and GC, should be able to know
  831. * whether this page exists or not.
  832. */
  833. struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
  834. {
  835. struct address_space *mapping = inode->i_mapping;
  836. struct page *page;
  837. repeat:
  838. page = get_read_data_page(inode, index, READ_SYNC);
  839. if (IS_ERR(page))
  840. return page;
  841. /* wait for read completion */
  842. lock_page(page);
  843. if (unlikely(!PageUptodate(page))) {
  844. f2fs_put_page(page, 1);
  845. return ERR_PTR(-EIO);
  846. }
  847. if (unlikely(page->mapping != mapping)) {
  848. f2fs_put_page(page, 1);
  849. goto repeat;
  850. }
  851. return page;
  852. }
  853. /*
  854. * Caller ensures that this data page is never allocated.
  855. * A new zero-filled data page is allocated in the page cache.
  856. *
  857. * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
  858. * f2fs_unlock_op().
  859. * Note that, ipage is set only by make_empty_dir.
  860. */
  861. struct page *get_new_data_page(struct inode *inode,
  862. struct page *ipage, pgoff_t index, bool new_i_size)
  863. {
  864. struct address_space *mapping = inode->i_mapping;
  865. struct page *page;
  866. struct dnode_of_data dn;
  867. int err;
  868. repeat:
  869. page = grab_cache_page(mapping, index);
  870. if (!page)
  871. return ERR_PTR(-ENOMEM);
  872. set_new_dnode(&dn, inode, ipage, NULL, 0);
  873. err = f2fs_reserve_block(&dn, index);
  874. if (err) {
  875. f2fs_put_page(page, 1);
  876. return ERR_PTR(err);
  877. }
  878. if (!ipage)
  879. f2fs_put_dnode(&dn);
  880. if (PageUptodate(page))
  881. goto got_it;
  882. if (dn.data_blkaddr == NEW_ADDR) {
  883. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  884. SetPageUptodate(page);
  885. } else {
  886. f2fs_put_page(page, 1);
  887. page = get_read_data_page(inode, index, READ_SYNC);
  888. if (IS_ERR(page))
  889. goto repeat;
  890. /* wait for read completion */
  891. lock_page(page);
  892. }
  893. got_it:
  894. if (new_i_size &&
  895. i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
  896. i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
  897. /* Only the directory inode sets new_i_size */
  898. set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
  899. }
  900. return page;
  901. }
  902. static int __allocate_data_block(struct dnode_of_data *dn)
  903. {
  904. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  905. struct f2fs_inode_info *fi = F2FS_I(dn->inode);
  906. struct f2fs_summary sum;
  907. struct node_info ni;
  908. int seg = CURSEG_WARM_DATA;
  909. pgoff_t fofs;
  910. if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
  911. return -EPERM;
  912. dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
  913. if (dn->data_blkaddr == NEW_ADDR)
  914. goto alloc;
  915. if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
  916. return -ENOSPC;
  917. alloc:
  918. get_node_info(sbi, dn->nid, &ni);
  919. set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
  920. if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
  921. seg = CURSEG_DIRECT_IO;
  922. allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
  923. &sum, seg);
  924. /* direct IO doesn't use extent cache to maximize the performance */
  925. set_data_blkaddr(dn);
  926. /* update i_size */
  927. fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
  928. dn->ofs_in_node;
  929. if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
  930. i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
  931. return 0;
  932. }
  933. static void __allocate_data_blocks(struct inode *inode, loff_t offset,
  934. size_t count)
  935. {
  936. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  937. struct dnode_of_data dn;
  938. u64 start = F2FS_BYTES_TO_BLK(offset);
  939. u64 len = F2FS_BYTES_TO_BLK(count);
  940. bool allocated;
  941. u64 end_offset;
  942. while (len) {
  943. f2fs_balance_fs(sbi);
  944. f2fs_lock_op(sbi);
  945. /* When reading holes, we need its node page */
  946. set_new_dnode(&dn, inode, NULL, NULL, 0);
  947. if (get_dnode_of_data(&dn, start, ALLOC_NODE))
  948. goto out;
  949. allocated = false;
  950. end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
  951. while (dn.ofs_in_node < end_offset && len) {
  952. block_t blkaddr;
  953. blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
  954. if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
  955. if (__allocate_data_block(&dn))
  956. goto sync_out;
  957. allocated = true;
  958. }
  959. len--;
  960. start++;
  961. dn.ofs_in_node++;
  962. }
  963. if (allocated)
  964. sync_inode_page(&dn);
  965. f2fs_put_dnode(&dn);
  966. f2fs_unlock_op(sbi);
  967. }
  968. return;
  969. sync_out:
  970. if (allocated)
  971. sync_inode_page(&dn);
  972. f2fs_put_dnode(&dn);
  973. out:
  974. f2fs_unlock_op(sbi);
  975. return;
  976. }
  977. /*
  978. * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
  979. * f2fs_map_blocks structure.
  980. * If original data blocks are allocated, then give them to blockdev.
  981. * Otherwise,
  982. * a. preallocate requested block addresses
  983. * b. do not use extent cache for better performance
  984. * c. give the block addresses to blockdev
  985. */
  986. static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
  987. int create, bool fiemap)
  988. {
  989. unsigned int maxblocks = map->m_len;
  990. struct dnode_of_data dn;
  991. int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
  992. pgoff_t pgofs, end_offset;
  993. int err = 0, ofs = 1;
  994. struct extent_info ei;
  995. bool allocated = false;
  996. map->m_len = 0;
  997. map->m_flags = 0;
  998. /* it only supports block size == page size */
  999. pgofs = (pgoff_t)map->m_lblk;
  1000. if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
  1001. map->m_pblk = ei.blk + pgofs - ei.fofs;
  1002. map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
  1003. map->m_flags = F2FS_MAP_MAPPED;
  1004. goto out;
  1005. }
  1006. if (create)
  1007. f2fs_lock_op(F2FS_I_SB(inode));
  1008. /* When reading holes, we need its node page */
  1009. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1010. err = get_dnode_of_data(&dn, pgofs, mode);
  1011. if (err) {
  1012. if (err == -ENOENT)
  1013. err = 0;
  1014. goto unlock_out;
  1015. }
  1016. if (dn.data_blkaddr == NEW_ADDR && !fiemap)
  1017. goto put_out;
  1018. if (dn.data_blkaddr != NULL_ADDR) {
  1019. map->m_flags = F2FS_MAP_MAPPED;
  1020. map->m_pblk = dn.data_blkaddr;
  1021. if (dn.data_blkaddr == NEW_ADDR)
  1022. map->m_flags |= F2FS_MAP_UNWRITTEN;
  1023. } else if (create) {
  1024. err = __allocate_data_block(&dn);
  1025. if (err)
  1026. goto put_out;
  1027. allocated = true;
  1028. map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
  1029. map->m_pblk = dn.data_blkaddr;
  1030. } else {
  1031. goto put_out;
  1032. }
  1033. end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
  1034. map->m_len = 1;
  1035. dn.ofs_in_node++;
  1036. pgofs++;
  1037. get_next:
  1038. if (dn.ofs_in_node >= end_offset) {
  1039. if (allocated)
  1040. sync_inode_page(&dn);
  1041. allocated = false;
  1042. f2fs_put_dnode(&dn);
  1043. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1044. err = get_dnode_of_data(&dn, pgofs, mode);
  1045. if (err) {
  1046. if (err == -ENOENT)
  1047. err = 0;
  1048. goto unlock_out;
  1049. }
  1050. if (dn.data_blkaddr == NEW_ADDR && !fiemap)
  1051. goto put_out;
  1052. end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
  1053. }
  1054. if (maxblocks > map->m_len) {
  1055. block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
  1056. if (blkaddr == NULL_ADDR && create) {
  1057. err = __allocate_data_block(&dn);
  1058. if (err)
  1059. goto sync_out;
  1060. allocated = true;
  1061. map->m_flags |= F2FS_MAP_NEW;
  1062. blkaddr = dn.data_blkaddr;
  1063. }
  1064. /* Give more consecutive addresses for the readahead */
  1065. if ((map->m_pblk != NEW_ADDR &&
  1066. blkaddr == (map->m_pblk + ofs)) ||
  1067. (map->m_pblk == NEW_ADDR &&
  1068. blkaddr == NEW_ADDR)) {
  1069. ofs++;
  1070. dn.ofs_in_node++;
  1071. pgofs++;
  1072. map->m_len++;
  1073. goto get_next;
  1074. }
  1075. }
  1076. sync_out:
  1077. if (allocated)
  1078. sync_inode_page(&dn);
  1079. put_out:
  1080. f2fs_put_dnode(&dn);
  1081. unlock_out:
  1082. if (create)
  1083. f2fs_unlock_op(F2FS_I_SB(inode));
  1084. out:
  1085. trace_f2fs_map_blocks(inode, map, err);
  1086. return err;
  1087. }
  1088. static int __get_data_block(struct inode *inode, sector_t iblock,
  1089. struct buffer_head *bh, int create, bool fiemap)
  1090. {
  1091. struct f2fs_map_blocks map;
  1092. int ret;
  1093. map.m_lblk = iblock;
  1094. map.m_len = bh->b_size >> inode->i_blkbits;
  1095. ret = f2fs_map_blocks(inode, &map, create, fiemap);
  1096. if (!ret) {
  1097. map_bh(bh, inode->i_sb, map.m_pblk);
  1098. bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
  1099. bh->b_size = map.m_len << inode->i_blkbits;
  1100. }
  1101. return ret;
  1102. }
  1103. static int get_data_block(struct inode *inode, sector_t iblock,
  1104. struct buffer_head *bh_result, int create)
  1105. {
  1106. return __get_data_block(inode, iblock, bh_result, create, false);
  1107. }
  1108. static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
  1109. struct buffer_head *bh_result, int create)
  1110. {
  1111. return __get_data_block(inode, iblock, bh_result, create, true);
  1112. }
  1113. static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
  1114. {
  1115. return (offset >> inode->i_blkbits);
  1116. }
  1117. static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
  1118. {
  1119. return (blk << inode->i_blkbits);
  1120. }
  1121. int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  1122. u64 start, u64 len)
  1123. {
  1124. struct buffer_head map_bh;
  1125. sector_t start_blk, last_blk;
  1126. loff_t isize = i_size_read(inode);
  1127. u64 logical = 0, phys = 0, size = 0;
  1128. u32 flags = 0;
  1129. bool past_eof = false, whole_file = false;
  1130. int ret = 0;
  1131. ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
  1132. if (ret)
  1133. return ret;
  1134. mutex_lock(&inode->i_mutex);
  1135. if (len >= isize) {
  1136. whole_file = true;
  1137. len = isize;
  1138. }
  1139. if (logical_to_blk(inode, len) == 0)
  1140. len = blk_to_logical(inode, 1);
  1141. start_blk = logical_to_blk(inode, start);
  1142. last_blk = logical_to_blk(inode, start + len - 1);
  1143. next:
  1144. memset(&map_bh, 0, sizeof(struct buffer_head));
  1145. map_bh.b_size = len;
  1146. ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0);
  1147. if (ret)
  1148. goto out;
  1149. /* HOLE */
  1150. if (!buffer_mapped(&map_bh)) {
  1151. start_blk++;
  1152. if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
  1153. past_eof = 1;
  1154. if (past_eof && size) {
  1155. flags |= FIEMAP_EXTENT_LAST;
  1156. ret = fiemap_fill_next_extent(fieinfo, logical,
  1157. phys, size, flags);
  1158. } else if (size) {
  1159. ret = fiemap_fill_next_extent(fieinfo, logical,
  1160. phys, size, flags);
  1161. size = 0;
  1162. }
  1163. /* if we have holes up to/past EOF then we're done */
  1164. if (start_blk > last_blk || past_eof || ret)
  1165. goto out;
  1166. } else {
  1167. if (start_blk > last_blk && !whole_file) {
  1168. ret = fiemap_fill_next_extent(fieinfo, logical,
  1169. phys, size, flags);
  1170. goto out;
  1171. }
  1172. /*
  1173. * if size != 0 then we know we already have an extent
  1174. * to add, so add it.
  1175. */
  1176. if (size) {
  1177. ret = fiemap_fill_next_extent(fieinfo, logical,
  1178. phys, size, flags);
  1179. if (ret)
  1180. goto out;
  1181. }
  1182. logical = blk_to_logical(inode, start_blk);
  1183. phys = blk_to_logical(inode, map_bh.b_blocknr);
  1184. size = map_bh.b_size;
  1185. flags = 0;
  1186. if (buffer_unwritten(&map_bh))
  1187. flags = FIEMAP_EXTENT_UNWRITTEN;
  1188. start_blk += logical_to_blk(inode, size);
  1189. /*
  1190. * If we are past the EOF, then we need to make sure as
  1191. * soon as we find a hole that the last extent we found
  1192. * is marked with FIEMAP_EXTENT_LAST
  1193. */
  1194. if (!past_eof && logical + size >= isize)
  1195. past_eof = true;
  1196. }
  1197. cond_resched();
  1198. if (fatal_signal_pending(current))
  1199. ret = -EINTR;
  1200. else
  1201. goto next;
  1202. out:
  1203. if (ret == 1)
  1204. ret = 0;
  1205. mutex_unlock(&inode->i_mutex);
  1206. return ret;
  1207. }
  1208. /*
  1209. * This function was originally taken from fs/mpage.c, and customized for f2fs.
  1210. * Major change was from block_size == page_size in f2fs by default.
  1211. */
  1212. static int f2fs_mpage_readpages(struct address_space *mapping,
  1213. struct list_head *pages, struct page *page,
  1214. unsigned nr_pages)
  1215. {
  1216. struct bio *bio = NULL;
  1217. unsigned page_idx;
  1218. sector_t last_block_in_bio = 0;
  1219. struct inode *inode = mapping->host;
  1220. const unsigned blkbits = inode->i_blkbits;
  1221. const unsigned blocksize = 1 << blkbits;
  1222. sector_t block_in_file;
  1223. sector_t last_block;
  1224. sector_t last_block_in_file;
  1225. sector_t block_nr;
  1226. struct block_device *bdev = inode->i_sb->s_bdev;
  1227. struct f2fs_map_blocks map;
  1228. map.m_pblk = 0;
  1229. map.m_lblk = 0;
  1230. map.m_len = 0;
  1231. map.m_flags = 0;
  1232. for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
  1233. prefetchw(&page->flags);
  1234. if (pages) {
  1235. page = list_entry(pages->prev, struct page, lru);
  1236. list_del(&page->lru);
  1237. if (add_to_page_cache_lru(page, mapping,
  1238. page->index, GFP_KERNEL))
  1239. goto next_page;
  1240. }
  1241. block_in_file = (sector_t)page->index;
  1242. last_block = block_in_file + nr_pages;
  1243. last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
  1244. blkbits;
  1245. if (last_block > last_block_in_file)
  1246. last_block = last_block_in_file;
  1247. /*
  1248. * Map blocks using the previous result first.
  1249. */
  1250. if ((map.m_flags & F2FS_MAP_MAPPED) &&
  1251. block_in_file > map.m_lblk &&
  1252. block_in_file < (map.m_lblk + map.m_len))
  1253. goto got_it;
  1254. /*
  1255. * Then do more f2fs_map_blocks() calls until we are
  1256. * done with this page.
  1257. */
  1258. map.m_flags = 0;
  1259. if (block_in_file < last_block) {
  1260. map.m_lblk = block_in_file;
  1261. map.m_len = last_block - block_in_file;
  1262. if (f2fs_map_blocks(inode, &map, 0, false))
  1263. goto set_error_page;
  1264. }
  1265. got_it:
  1266. if ((map.m_flags & F2FS_MAP_MAPPED)) {
  1267. block_nr = map.m_pblk + block_in_file - map.m_lblk;
  1268. SetPageMappedToDisk(page);
  1269. if (!PageUptodate(page) && !cleancache_get_page(page)) {
  1270. SetPageUptodate(page);
  1271. goto confused;
  1272. }
  1273. } else {
  1274. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  1275. SetPageUptodate(page);
  1276. unlock_page(page);
  1277. goto next_page;
  1278. }
  1279. /*
  1280. * This page will go to BIO. Do we need to send this
  1281. * BIO off first?
  1282. */
  1283. if (bio && (last_block_in_bio != block_nr - 1)) {
  1284. submit_and_realloc:
  1285. submit_bio(READ, bio);
  1286. bio = NULL;
  1287. }
  1288. if (bio == NULL) {
  1289. struct f2fs_crypto_ctx *ctx = NULL;
  1290. if (f2fs_encrypted_inode(inode) &&
  1291. S_ISREG(inode->i_mode)) {
  1292. struct page *cpage;
  1293. ctx = f2fs_get_crypto_ctx(inode);
  1294. if (IS_ERR(ctx))
  1295. goto set_error_page;
  1296. /* wait the page to be moved by cleaning */
  1297. cpage = find_lock_page(
  1298. META_MAPPING(F2FS_I_SB(inode)),
  1299. block_nr);
  1300. if (cpage) {
  1301. f2fs_wait_on_page_writeback(cpage,
  1302. DATA);
  1303. f2fs_put_page(cpage, 1);
  1304. }
  1305. }
  1306. bio = bio_alloc(GFP_KERNEL,
  1307. min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
  1308. if (!bio) {
  1309. if (ctx)
  1310. f2fs_release_crypto_ctx(ctx);
  1311. goto set_error_page;
  1312. }
  1313. bio->bi_bdev = bdev;
  1314. bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
  1315. bio->bi_end_io = f2fs_read_end_io;
  1316. bio->bi_private = ctx;
  1317. }
  1318. if (bio_add_page(bio, page, blocksize, 0) < blocksize)
  1319. goto submit_and_realloc;
  1320. last_block_in_bio = block_nr;
  1321. goto next_page;
  1322. set_error_page:
  1323. SetPageError(page);
  1324. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  1325. unlock_page(page);
  1326. goto next_page;
  1327. confused:
  1328. if (bio) {
  1329. submit_bio(READ, bio);
  1330. bio = NULL;
  1331. }
  1332. unlock_page(page);
  1333. next_page:
  1334. if (pages)
  1335. page_cache_release(page);
  1336. }
  1337. BUG_ON(pages && !list_empty(pages));
  1338. if (bio)
  1339. submit_bio(READ, bio);
  1340. return 0;
  1341. }
  1342. static int f2fs_read_data_page(struct file *file, struct page *page)
  1343. {
  1344. struct inode *inode = page->mapping->host;
  1345. int ret = -EAGAIN;
  1346. trace_f2fs_readpage(page, DATA);
  1347. /* If the file has inline data, try to read it directly */
  1348. if (f2fs_has_inline_data(inode))
  1349. ret = f2fs_read_inline_data(inode, page);
  1350. if (ret == -EAGAIN)
  1351. ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
  1352. return ret;
  1353. }
  1354. static int f2fs_read_data_pages(struct file *file,
  1355. struct address_space *mapping,
  1356. struct list_head *pages, unsigned nr_pages)
  1357. {
  1358. struct inode *inode = file->f_mapping->host;
  1359. /* If the file has inline data, skip readpages */
  1360. if (f2fs_has_inline_data(inode))
  1361. return 0;
  1362. return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
  1363. }
  1364. int do_write_data_page(struct f2fs_io_info *fio)
  1365. {
  1366. struct page *page = fio->page;
  1367. struct inode *inode = page->mapping->host;
  1368. struct dnode_of_data dn;
  1369. int err = 0;
  1370. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1371. err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
  1372. if (err)
  1373. return err;
  1374. fio->blk_addr = dn.data_blkaddr;
  1375. /* This page is already truncated */
  1376. if (fio->blk_addr == NULL_ADDR) {
  1377. ClearPageUptodate(page);
  1378. goto out_writepage;
  1379. }
  1380. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
  1381. fio->encrypted_page = f2fs_encrypt(inode, fio->page);
  1382. if (IS_ERR(fio->encrypted_page)) {
  1383. err = PTR_ERR(fio->encrypted_page);
  1384. goto out_writepage;
  1385. }
  1386. }
  1387. set_page_writeback(page);
  1388. /*
  1389. * If current allocation needs SSR,
  1390. * it had better in-place writes for updated data.
  1391. */
  1392. if (unlikely(fio->blk_addr != NEW_ADDR &&
  1393. !is_cold_data(page) &&
  1394. need_inplace_update(inode))) {
  1395. rewrite_data_page(fio);
  1396. set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
  1397. trace_f2fs_do_write_data_page(page, IPU);
  1398. } else {
  1399. write_data_page(&dn, fio);
  1400. set_data_blkaddr(&dn);
  1401. f2fs_update_extent_cache(&dn);
  1402. trace_f2fs_do_write_data_page(page, OPU);
  1403. set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
  1404. if (page->index == 0)
  1405. set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
  1406. }
  1407. out_writepage:
  1408. f2fs_put_dnode(&dn);
  1409. return err;
  1410. }
  1411. static int f2fs_write_data_page(struct page *page,
  1412. struct writeback_control *wbc)
  1413. {
  1414. struct inode *inode = page->mapping->host;
  1415. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1416. loff_t i_size = i_size_read(inode);
  1417. const pgoff_t end_index = ((unsigned long long) i_size)
  1418. >> PAGE_CACHE_SHIFT;
  1419. unsigned offset = 0;
  1420. bool need_balance_fs = false;
  1421. int err = 0;
  1422. struct f2fs_io_info fio = {
  1423. .sbi = sbi,
  1424. .type = DATA,
  1425. .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
  1426. .page = page,
  1427. .encrypted_page = NULL,
  1428. };
  1429. trace_f2fs_writepage(page, DATA);
  1430. if (page->index < end_index)
  1431. goto write;
  1432. /*
  1433. * If the offset is out-of-range of file size,
  1434. * this page does not have to be written to disk.
  1435. */
  1436. offset = i_size & (PAGE_CACHE_SIZE - 1);
  1437. if ((page->index >= end_index + 1) || !offset)
  1438. goto out;
  1439. zero_user_segment(page, offset, PAGE_CACHE_SIZE);
  1440. write:
  1441. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  1442. goto redirty_out;
  1443. if (f2fs_is_drop_cache(inode))
  1444. goto out;
  1445. if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
  1446. available_free_memory(sbi, BASE_CHECK))
  1447. goto redirty_out;
  1448. /* Dentry blocks are controlled by checkpoint */
  1449. if (S_ISDIR(inode->i_mode)) {
  1450. if (unlikely(f2fs_cp_error(sbi)))
  1451. goto redirty_out;
  1452. err = do_write_data_page(&fio);
  1453. goto done;
  1454. }
  1455. /* we should bypass data pages to proceed the kworkder jobs */
  1456. if (unlikely(f2fs_cp_error(sbi))) {
  1457. SetPageError(page);
  1458. goto out;
  1459. }
  1460. if (!wbc->for_reclaim)
  1461. need_balance_fs = true;
  1462. else if (has_not_enough_free_secs(sbi, 0))
  1463. goto redirty_out;
  1464. err = -EAGAIN;
  1465. f2fs_lock_op(sbi);
  1466. if (f2fs_has_inline_data(inode))
  1467. err = f2fs_write_inline_data(inode, page);
  1468. if (err == -EAGAIN)
  1469. err = do_write_data_page(&fio);
  1470. f2fs_unlock_op(sbi);
  1471. done:
  1472. if (err && err != -ENOENT)
  1473. goto redirty_out;
  1474. clear_cold_data(page);
  1475. out:
  1476. inode_dec_dirty_pages(inode);
  1477. if (err)
  1478. ClearPageUptodate(page);
  1479. unlock_page(page);
  1480. if (need_balance_fs)
  1481. f2fs_balance_fs(sbi);
  1482. if (wbc->for_reclaim)
  1483. f2fs_submit_merged_bio(sbi, DATA, WRITE);
  1484. return 0;
  1485. redirty_out:
  1486. redirty_page_for_writepage(wbc, page);
  1487. return AOP_WRITEPAGE_ACTIVATE;
  1488. }
  1489. static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
  1490. void *data)
  1491. {
  1492. struct address_space *mapping = data;
  1493. int ret = mapping->a_ops->writepage(page, wbc);
  1494. mapping_set_error(mapping, ret);
  1495. return ret;
  1496. }
  1497. static int f2fs_write_data_pages(struct address_space *mapping,
  1498. struct writeback_control *wbc)
  1499. {
  1500. struct inode *inode = mapping->host;
  1501. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1502. bool locked = false;
  1503. int ret;
  1504. long diff;
  1505. trace_f2fs_writepages(mapping->host, wbc, DATA);
  1506. /* deal with chardevs and other special file */
  1507. if (!mapping->a_ops->writepage)
  1508. return 0;
  1509. if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
  1510. get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
  1511. available_free_memory(sbi, DIRTY_DENTS))
  1512. goto skip_write;
  1513. /* during POR, we don't need to trigger writepage at all. */
  1514. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  1515. goto skip_write;
  1516. diff = nr_pages_to_write(sbi, DATA, wbc);
  1517. if (!S_ISDIR(inode->i_mode)) {
  1518. mutex_lock(&sbi->writepages);
  1519. locked = true;
  1520. }
  1521. ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
  1522. if (locked)
  1523. mutex_unlock(&sbi->writepages);
  1524. f2fs_submit_merged_bio(sbi, DATA, WRITE);
  1525. remove_dirty_dir_inode(inode);
  1526. wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
  1527. return ret;
  1528. skip_write:
  1529. wbc->pages_skipped += get_dirty_pages(inode);
  1530. return 0;
  1531. }
  1532. static void f2fs_write_failed(struct address_space *mapping, loff_t to)
  1533. {
  1534. struct inode *inode = mapping->host;
  1535. if (to > inode->i_size) {
  1536. truncate_pagecache(inode, inode->i_size);
  1537. truncate_blocks(inode, inode->i_size, true);
  1538. }
  1539. }
  1540. static int f2fs_write_begin(struct file *file, struct address_space *mapping,
  1541. loff_t pos, unsigned len, unsigned flags,
  1542. struct page **pagep, void **fsdata)
  1543. {
  1544. struct inode *inode = mapping->host;
  1545. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1546. struct page *page, *ipage;
  1547. pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
  1548. struct dnode_of_data dn;
  1549. int err = 0;
  1550. trace_f2fs_write_begin(inode, pos, len, flags);
  1551. f2fs_balance_fs(sbi);
  1552. /*
  1553. * We should check this at this moment to avoid deadlock on inode page
  1554. * and #0 page. The locking rule for inline_data conversion should be:
  1555. * lock_page(page #0) -> lock_page(inode_page)
  1556. */
  1557. if (index != 0) {
  1558. err = f2fs_convert_inline_inode(inode);
  1559. if (err)
  1560. goto fail;
  1561. }
  1562. repeat:
  1563. page = grab_cache_page_write_begin(mapping, index, flags);
  1564. if (!page) {
  1565. err = -ENOMEM;
  1566. goto fail;
  1567. }
  1568. *pagep = page;
  1569. f2fs_lock_op(sbi);
  1570. /* check inline_data */
  1571. ipage = get_node_page(sbi, inode->i_ino);
  1572. if (IS_ERR(ipage)) {
  1573. err = PTR_ERR(ipage);
  1574. goto unlock_fail;
  1575. }
  1576. set_new_dnode(&dn, inode, ipage, ipage, 0);
  1577. if (f2fs_has_inline_data(inode)) {
  1578. if (pos + len <= MAX_INLINE_DATA) {
  1579. read_inline_data(page, ipage);
  1580. set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
  1581. sync_inode_page(&dn);
  1582. goto put_next;
  1583. }
  1584. err = f2fs_convert_inline_page(&dn, page);
  1585. if (err)
  1586. goto put_fail;
  1587. }
  1588. err = f2fs_reserve_block(&dn, index);
  1589. if (err)
  1590. goto put_fail;
  1591. put_next:
  1592. f2fs_put_dnode(&dn);
  1593. f2fs_unlock_op(sbi);
  1594. if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
  1595. return 0;
  1596. f2fs_wait_on_page_writeback(page, DATA);
  1597. if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
  1598. unsigned start = pos & (PAGE_CACHE_SIZE - 1);
  1599. unsigned end = start + len;
  1600. /* Reading beyond i_size is simple: memset to zero */
  1601. zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
  1602. goto out;
  1603. }
  1604. if (dn.data_blkaddr == NEW_ADDR) {
  1605. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  1606. } else {
  1607. struct f2fs_io_info fio = {
  1608. .sbi = sbi,
  1609. .type = DATA,
  1610. .rw = READ_SYNC,
  1611. .blk_addr = dn.data_blkaddr,
  1612. .page = page,
  1613. .encrypted_page = NULL,
  1614. };
  1615. err = f2fs_submit_page_bio(&fio);
  1616. if (err)
  1617. goto fail;
  1618. lock_page(page);
  1619. if (unlikely(!PageUptodate(page))) {
  1620. f2fs_put_page(page, 1);
  1621. err = -EIO;
  1622. goto fail;
  1623. }
  1624. if (unlikely(page->mapping != mapping)) {
  1625. f2fs_put_page(page, 1);
  1626. goto repeat;
  1627. }
  1628. /* avoid symlink page */
  1629. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
  1630. err = f2fs_decrypt_one(inode, page);
  1631. if (err) {
  1632. f2fs_put_page(page, 1);
  1633. goto fail;
  1634. }
  1635. }
  1636. }
  1637. out:
  1638. SetPageUptodate(page);
  1639. clear_cold_data(page);
  1640. return 0;
  1641. put_fail:
  1642. f2fs_put_dnode(&dn);
  1643. unlock_fail:
  1644. f2fs_unlock_op(sbi);
  1645. f2fs_put_page(page, 1);
  1646. fail:
  1647. f2fs_write_failed(mapping, pos + len);
  1648. return err;
  1649. }
  1650. static int f2fs_write_end(struct file *file,
  1651. struct address_space *mapping,
  1652. loff_t pos, unsigned len, unsigned copied,
  1653. struct page *page, void *fsdata)
  1654. {
  1655. struct inode *inode = page->mapping->host;
  1656. trace_f2fs_write_end(inode, pos, len, copied);
  1657. set_page_dirty(page);
  1658. if (pos + copied > i_size_read(inode)) {
  1659. i_size_write(inode, pos + copied);
  1660. mark_inode_dirty(inode);
  1661. update_inode_page(inode);
  1662. }
  1663. f2fs_put_page(page, 1);
  1664. return copied;
  1665. }
  1666. static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
  1667. loff_t offset)
  1668. {
  1669. unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
  1670. if (iov_iter_rw(iter) == READ)
  1671. return 0;
  1672. if (offset & blocksize_mask)
  1673. return -EINVAL;
  1674. if (iov_iter_alignment(iter) & blocksize_mask)
  1675. return -EINVAL;
  1676. return 0;
  1677. }
  1678. static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
  1679. loff_t offset)
  1680. {
  1681. struct file *file = iocb->ki_filp;
  1682. struct address_space *mapping = file->f_mapping;
  1683. struct inode *inode = mapping->host;
  1684. size_t count = iov_iter_count(iter);
  1685. int err;
  1686. /* we don't need to use inline_data strictly */
  1687. if (f2fs_has_inline_data(inode)) {
  1688. err = f2fs_convert_inline_inode(inode);
  1689. if (err)
  1690. return err;
  1691. }
  1692. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  1693. return 0;
  1694. if (check_direct_IO(inode, iter, offset))
  1695. return 0;
  1696. trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
  1697. if (iov_iter_rw(iter) == WRITE)
  1698. __allocate_data_blocks(inode, offset, count);
  1699. err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
  1700. if (err < 0 && iov_iter_rw(iter) == WRITE)
  1701. f2fs_write_failed(mapping, offset + count);
  1702. trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
  1703. return err;
  1704. }
  1705. void f2fs_invalidate_page(struct page *page, unsigned int offset,
  1706. unsigned int length)
  1707. {
  1708. struct inode *inode = page->mapping->host;
  1709. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1710. if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
  1711. (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
  1712. return;
  1713. if (PageDirty(page)) {
  1714. if (inode->i_ino == F2FS_META_INO(sbi))
  1715. dec_page_count(sbi, F2FS_DIRTY_META);
  1716. else if (inode->i_ino == F2FS_NODE_INO(sbi))
  1717. dec_page_count(sbi, F2FS_DIRTY_NODES);
  1718. else
  1719. inode_dec_dirty_pages(inode);
  1720. }
  1721. ClearPagePrivate(page);
  1722. }
  1723. int f2fs_release_page(struct page *page, gfp_t wait)
  1724. {
  1725. /* If this is dirty page, keep PagePrivate */
  1726. if (PageDirty(page))
  1727. return 0;
  1728. ClearPagePrivate(page);
  1729. return 1;
  1730. }
  1731. static int f2fs_set_data_page_dirty(struct page *page)
  1732. {
  1733. struct address_space *mapping = page->mapping;
  1734. struct inode *inode = mapping->host;
  1735. trace_f2fs_set_page_dirty(page, DATA);
  1736. SetPageUptodate(page);
  1737. if (f2fs_is_atomic_file(inode)) {
  1738. register_inmem_page(inode, page);
  1739. return 1;
  1740. }
  1741. mark_inode_dirty(inode);
  1742. if (!PageDirty(page)) {
  1743. __set_page_dirty_nobuffers(page);
  1744. update_dirty_page(inode, page);
  1745. return 1;
  1746. }
  1747. return 0;
  1748. }
  1749. static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
  1750. {
  1751. struct inode *inode = mapping->host;
  1752. /* we don't need to use inline_data strictly */
  1753. if (f2fs_has_inline_data(inode)) {
  1754. int err = f2fs_convert_inline_inode(inode);
  1755. if (err)
  1756. return err;
  1757. }
  1758. return generic_block_bmap(mapping, block, get_data_block);
  1759. }
  1760. void init_extent_cache_info(struct f2fs_sb_info *sbi)
  1761. {
  1762. INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
  1763. init_rwsem(&sbi->extent_tree_lock);
  1764. INIT_LIST_HEAD(&sbi->extent_list);
  1765. spin_lock_init(&sbi->extent_lock);
  1766. sbi->total_ext_tree = 0;
  1767. atomic_set(&sbi->total_ext_node, 0);
  1768. }
  1769. int __init create_extent_cache(void)
  1770. {
  1771. extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
  1772. sizeof(struct extent_tree));
  1773. if (!extent_tree_slab)
  1774. return -ENOMEM;
  1775. extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
  1776. sizeof(struct extent_node));
  1777. if (!extent_node_slab) {
  1778. kmem_cache_destroy(extent_tree_slab);
  1779. return -ENOMEM;
  1780. }
  1781. return 0;
  1782. }
  1783. void destroy_extent_cache(void)
  1784. {
  1785. kmem_cache_destroy(extent_node_slab);
  1786. kmem_cache_destroy(extent_tree_slab);
  1787. }
  1788. const struct address_space_operations f2fs_dblock_aops = {
  1789. .readpage = f2fs_read_data_page,
  1790. .readpages = f2fs_read_data_pages,
  1791. .writepage = f2fs_write_data_page,
  1792. .writepages = f2fs_write_data_pages,
  1793. .write_begin = f2fs_write_begin,
  1794. .write_end = f2fs_write_end,
  1795. .set_page_dirty = f2fs_set_data_page_dirty,
  1796. .invalidatepage = f2fs_invalidate_page,
  1797. .releasepage = f2fs_release_page,
  1798. .direct_IO = f2fs_direct_IO,
  1799. .bmap = f2fs_bmap,
  1800. };