btt.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575
  1. /*
  2. * Block Translation Table
  3. * Copyright (c) 2014-2015, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. */
  14. #include <linux/highmem.h>
  15. #include <linux/debugfs.h>
  16. #include <linux/blkdev.h>
  17. #include <linux/module.h>
  18. #include <linux/device.h>
  19. #include <linux/mutex.h>
  20. #include <linux/hdreg.h>
  21. #include <linux/genhd.h>
  22. #include <linux/sizes.h>
  23. #include <linux/ndctl.h>
  24. #include <linux/fs.h>
  25. #include <linux/nd.h>
  26. #include "btt.h"
  27. #include "nd.h"
  28. enum log_ent_request {
  29. LOG_NEW_ENT = 0,
  30. LOG_OLD_ENT
  31. };
  32. static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
  33. void *buf, size_t n)
  34. {
  35. struct nd_btt *nd_btt = arena->nd_btt;
  36. struct nd_namespace_common *ndns = nd_btt->ndns;
  37. /* arena offsets are 4K from the base of the device */
  38. offset += SZ_4K;
  39. return nvdimm_read_bytes(ndns, offset, buf, n);
  40. }
  41. static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
  42. void *buf, size_t n)
  43. {
  44. struct nd_btt *nd_btt = arena->nd_btt;
  45. struct nd_namespace_common *ndns = nd_btt->ndns;
  46. /* arena offsets are 4K from the base of the device */
  47. offset += SZ_4K;
  48. return nvdimm_write_bytes(ndns, offset, buf, n);
  49. }
  50. static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
  51. {
  52. int ret;
  53. ret = arena_write_bytes(arena, arena->info2off, super,
  54. sizeof(struct btt_sb));
  55. if (ret)
  56. return ret;
  57. return arena_write_bytes(arena, arena->infooff, super,
  58. sizeof(struct btt_sb));
  59. }
  60. static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
  61. {
  62. WARN_ON(!super);
  63. return arena_read_bytes(arena, arena->infooff, super,
  64. sizeof(struct btt_sb));
  65. }
  66. /*
  67. * 'raw' version of btt_map write
  68. * Assumptions:
  69. * mapping is in little-endian
  70. * mapping contains 'E' and 'Z' flags as desired
  71. */
  72. static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
  73. {
  74. u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
  75. WARN_ON(lba >= arena->external_nlba);
  76. return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
  77. }
  78. static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
  79. u32 z_flag, u32 e_flag)
  80. {
  81. u32 ze;
  82. __le32 mapping_le;
  83. /*
  84. * This 'mapping' is supposed to be just the LBA mapping, without
  85. * any flags set, so strip the flag bits.
  86. */
  87. mapping &= MAP_LBA_MASK;
  88. ze = (z_flag << 1) + e_flag;
  89. switch (ze) {
  90. case 0:
  91. /*
  92. * We want to set neither of the Z or E flags, and
  93. * in the actual layout, this means setting the bit
  94. * positions of both to '1' to indicate a 'normal'
  95. * map entry
  96. */
  97. mapping |= MAP_ENT_NORMAL;
  98. break;
  99. case 1:
  100. mapping |= (1 << MAP_ERR_SHIFT);
  101. break;
  102. case 2:
  103. mapping |= (1 << MAP_TRIM_SHIFT);
  104. break;
  105. default:
  106. /*
  107. * The case where Z and E are both sent in as '1' could be
  108. * construed as a valid 'normal' case, but we decide not to,
  109. * to avoid confusion
  110. */
  111. WARN_ONCE(1, "Invalid use of Z and E flags\n");
  112. return -EIO;
  113. }
  114. mapping_le = cpu_to_le32(mapping);
  115. return __btt_map_write(arena, lba, mapping_le);
  116. }
  117. static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
  118. int *trim, int *error)
  119. {
  120. int ret;
  121. __le32 in;
  122. u32 raw_mapping, postmap, ze, z_flag, e_flag;
  123. u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
  124. WARN_ON(lba >= arena->external_nlba);
  125. ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
  126. if (ret)
  127. return ret;
  128. raw_mapping = le32_to_cpu(in);
  129. z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
  130. e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
  131. ze = (z_flag << 1) + e_flag;
  132. postmap = raw_mapping & MAP_LBA_MASK;
  133. /* Reuse the {z,e}_flag variables for *trim and *error */
  134. z_flag = 0;
  135. e_flag = 0;
  136. switch (ze) {
  137. case 0:
  138. /* Initial state. Return postmap = premap */
  139. *mapping = lba;
  140. break;
  141. case 1:
  142. *mapping = postmap;
  143. e_flag = 1;
  144. break;
  145. case 2:
  146. *mapping = postmap;
  147. z_flag = 1;
  148. break;
  149. case 3:
  150. *mapping = postmap;
  151. break;
  152. default:
  153. return -EIO;
  154. }
  155. if (trim)
  156. *trim = z_flag;
  157. if (error)
  158. *error = e_flag;
  159. return ret;
  160. }
  161. static int btt_log_group_read(struct arena_info *arena, u32 lane,
  162. struct log_group *log)
  163. {
  164. WARN_ON(!log);
  165. return arena_read_bytes(arena,
  166. arena->logoff + (lane * LOG_GRP_SIZE), log,
  167. LOG_GRP_SIZE);
  168. }
  169. static struct dentry *debugfs_root;
  170. static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
  171. int idx)
  172. {
  173. char dirname[32];
  174. struct dentry *d;
  175. /* If for some reason, parent bttN was not created, exit */
  176. if (!parent)
  177. return;
  178. snprintf(dirname, 32, "arena%d", idx);
  179. d = debugfs_create_dir(dirname, parent);
  180. if (IS_ERR_OR_NULL(d))
  181. return;
  182. a->debugfs_dir = d;
  183. debugfs_create_x64("size", S_IRUGO, d, &a->size);
  184. debugfs_create_x64("external_lba_start", S_IRUGO, d,
  185. &a->external_lba_start);
  186. debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba);
  187. debugfs_create_u32("internal_lbasize", S_IRUGO, d,
  188. &a->internal_lbasize);
  189. debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba);
  190. debugfs_create_u32("external_lbasize", S_IRUGO, d,
  191. &a->external_lbasize);
  192. debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree);
  193. debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major);
  194. debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor);
  195. debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff);
  196. debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff);
  197. debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff);
  198. debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff);
  199. debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
  200. debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
  201. debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
  202. debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
  203. debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
  204. }
  205. static void btt_debugfs_init(struct btt *btt)
  206. {
  207. int i = 0;
  208. struct arena_info *arena;
  209. btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev),
  210. debugfs_root);
  211. if (IS_ERR_OR_NULL(btt->debugfs_dir))
  212. return;
  213. list_for_each_entry(arena, &btt->arena_list, list) {
  214. arena_debugfs_init(arena, btt->debugfs_dir, i);
  215. i++;
  216. }
  217. }
  218. static u32 log_seq(struct log_group *log, int log_idx)
  219. {
  220. return le32_to_cpu(log->ent[log_idx].seq);
  221. }
  222. /*
  223. * This function accepts two log entries, and uses the
  224. * sequence number to find the 'older' entry.
  225. * It also updates the sequence number in this old entry to
  226. * make it the 'new' one if the mark_flag is set.
  227. * Finally, it returns which of the entries was the older one.
  228. *
  229. * TODO The logic feels a bit kludge-y. make it better..
  230. */
  231. static int btt_log_get_old(struct arena_info *a, struct log_group *log)
  232. {
  233. int idx0 = a->log_index[0];
  234. int idx1 = a->log_index[1];
  235. int old;
  236. /*
  237. * the first ever time this is seen, the entry goes into [0]
  238. * the next time, the following logic works out to put this
  239. * (next) entry into [1]
  240. */
  241. if (log_seq(log, idx0) == 0) {
  242. log->ent[idx0].seq = cpu_to_le32(1);
  243. return 0;
  244. }
  245. if (log_seq(log, idx0) == log_seq(log, idx1))
  246. return -EINVAL;
  247. if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
  248. return -EINVAL;
  249. if (log_seq(log, idx0) < log_seq(log, idx1)) {
  250. if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
  251. old = 0;
  252. else
  253. old = 1;
  254. } else {
  255. if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
  256. old = 1;
  257. else
  258. old = 0;
  259. }
  260. return old;
  261. }
  262. static struct device *to_dev(struct arena_info *arena)
  263. {
  264. return &arena->nd_btt->dev;
  265. }
  266. /*
  267. * This function copies the desired (old/new) log entry into ent if
  268. * it is not NULL. It returns the sub-slot number (0 or 1)
  269. * where the desired log entry was found. Negative return values
  270. * indicate errors.
  271. */
  272. static int btt_log_read(struct arena_info *arena, u32 lane,
  273. struct log_entry *ent, int old_flag)
  274. {
  275. int ret;
  276. int old_ent, ret_ent;
  277. struct log_group log;
  278. ret = btt_log_group_read(arena, lane, &log);
  279. if (ret)
  280. return -EIO;
  281. old_ent = btt_log_get_old(arena, &log);
  282. if (old_ent < 0 || old_ent > 1) {
  283. dev_info(to_dev(arena),
  284. "log corruption (%d): lane %d seq [%d, %d]\n",
  285. old_ent, lane, log.ent[arena->log_index[0]].seq,
  286. log.ent[arena->log_index[1]].seq);
  287. /* TODO set error state? */
  288. return -EIO;
  289. }
  290. ret_ent = (old_flag ? old_ent : (1 - old_ent));
  291. if (ent != NULL)
  292. memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
  293. return ret_ent;
  294. }
  295. /*
  296. * This function commits a log entry to media
  297. * It does _not_ prepare the freelist entry for the next write
  298. * btt_flog_write is the wrapper for updating the freelist elements
  299. */
  300. static int __btt_log_write(struct arena_info *arena, u32 lane,
  301. u32 sub, struct log_entry *ent)
  302. {
  303. int ret;
  304. u32 group_slot = arena->log_index[sub];
  305. unsigned int log_half = LOG_ENT_SIZE / 2;
  306. void *src = ent;
  307. u64 ns_off;
  308. ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
  309. (group_slot * LOG_ENT_SIZE);
  310. /* split the 16B write into atomic, durable halves */
  311. ret = arena_write_bytes(arena, ns_off, src, log_half);
  312. if (ret)
  313. return ret;
  314. ns_off += log_half;
  315. src += log_half;
  316. return arena_write_bytes(arena, ns_off, src, log_half);
  317. }
  318. static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
  319. struct log_entry *ent)
  320. {
  321. int ret;
  322. ret = __btt_log_write(arena, lane, sub, ent);
  323. if (ret)
  324. return ret;
  325. /* prepare the next free entry */
  326. arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
  327. if (++(arena->freelist[lane].seq) == 4)
  328. arena->freelist[lane].seq = 1;
  329. arena->freelist[lane].block = le32_to_cpu(ent->old_map);
  330. return ret;
  331. }
  332. /*
  333. * This function initializes the BTT map to the initial state, which is
  334. * all-zeroes, and indicates an identity mapping
  335. */
  336. static int btt_map_init(struct arena_info *arena)
  337. {
  338. int ret = -EINVAL;
  339. void *zerobuf;
  340. size_t offset = 0;
  341. size_t chunk_size = SZ_2M;
  342. size_t mapsize = arena->logoff - arena->mapoff;
  343. zerobuf = kzalloc(chunk_size, GFP_KERNEL);
  344. if (!zerobuf)
  345. return -ENOMEM;
  346. while (mapsize) {
  347. size_t size = min(mapsize, chunk_size);
  348. ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
  349. size);
  350. if (ret)
  351. goto free;
  352. offset += size;
  353. mapsize -= size;
  354. cond_resched();
  355. }
  356. free:
  357. kfree(zerobuf);
  358. return ret;
  359. }
  360. /*
  361. * This function initializes the BTT log with 'fake' entries pointing
  362. * to the initial reserved set of blocks as being free
  363. */
  364. static int btt_log_init(struct arena_info *arena)
  365. {
  366. int ret;
  367. u32 i;
  368. struct log_entry ent, zerolog;
  369. memset(&zerolog, 0, sizeof(zerolog));
  370. for (i = 0; i < arena->nfree; i++) {
  371. ent.lba = cpu_to_le32(i);
  372. ent.old_map = cpu_to_le32(arena->external_nlba + i);
  373. ent.new_map = cpu_to_le32(arena->external_nlba + i);
  374. ent.seq = cpu_to_le32(LOG_SEQ_INIT);
  375. ret = __btt_log_write(arena, i, 0, &ent);
  376. if (ret)
  377. return ret;
  378. ret = __btt_log_write(arena, i, 1, &zerolog);
  379. if (ret)
  380. return ret;
  381. }
  382. return 0;
  383. }
  384. static int btt_freelist_init(struct arena_info *arena)
  385. {
  386. int old, new, ret;
  387. u32 i, map_entry;
  388. struct log_entry log_new, log_old;
  389. arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
  390. GFP_KERNEL);
  391. if (!arena->freelist)
  392. return -ENOMEM;
  393. for (i = 0; i < arena->nfree; i++) {
  394. old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
  395. if (old < 0)
  396. return old;
  397. new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
  398. if (new < 0)
  399. return new;
  400. /* sub points to the next one to be overwritten */
  401. arena->freelist[i].sub = 1 - new;
  402. arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
  403. arena->freelist[i].block = le32_to_cpu(log_new.old_map);
  404. /* This implies a newly created or untouched flog entry */
  405. if (log_new.old_map == log_new.new_map)
  406. continue;
  407. /* Check if map recovery is needed */
  408. ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
  409. NULL, NULL);
  410. if (ret)
  411. return ret;
  412. if ((le32_to_cpu(log_new.new_map) != map_entry) &&
  413. (le32_to_cpu(log_new.old_map) == map_entry)) {
  414. /*
  415. * Last transaction wrote the flog, but wasn't able
  416. * to complete the map write. So fix up the map.
  417. */
  418. ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
  419. le32_to_cpu(log_new.new_map), 0, 0);
  420. if (ret)
  421. return ret;
  422. }
  423. }
  424. return 0;
  425. }
  426. static bool ent_is_padding(struct log_entry *ent)
  427. {
  428. return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
  429. && (ent->seq == 0);
  430. }
  431. /*
  432. * Detecting valid log indices: We read a log group (see the comments in btt.h
  433. * for a description of a 'log_group' and its 'slots'), and iterate over its
  434. * four slots. We expect that a padding slot will be all-zeroes, and use this
  435. * to detect a padding slot vs. an actual entry.
  436. *
  437. * If a log_group is in the initial state, i.e. hasn't been used since the
  438. * creation of this BTT layout, it will have three of the four slots with
  439. * zeroes. We skip over these log_groups for the detection of log_index. If
  440. * all log_groups are in the initial state (i.e. the BTT has never been
  441. * written to), it is safe to assume the 'new format' of log entries in slots
  442. * (0, 1).
  443. */
  444. static int log_set_indices(struct arena_info *arena)
  445. {
  446. bool idx_set = false, initial_state = true;
  447. int ret, log_index[2] = {-1, -1};
  448. u32 i, j, next_idx = 0;
  449. struct log_group log;
  450. u32 pad_count = 0;
  451. for (i = 0; i < arena->nfree; i++) {
  452. ret = btt_log_group_read(arena, i, &log);
  453. if (ret < 0)
  454. return ret;
  455. for (j = 0; j < 4; j++) {
  456. if (!idx_set) {
  457. if (ent_is_padding(&log.ent[j])) {
  458. pad_count++;
  459. continue;
  460. } else {
  461. /* Skip if index has been recorded */
  462. if ((next_idx == 1) &&
  463. (j == log_index[0]))
  464. continue;
  465. /* valid entry, record index */
  466. log_index[next_idx] = j;
  467. next_idx++;
  468. }
  469. if (next_idx == 2) {
  470. /* two valid entries found */
  471. idx_set = true;
  472. } else if (next_idx > 2) {
  473. /* too many valid indices */
  474. return -ENXIO;
  475. }
  476. } else {
  477. /*
  478. * once the indices have been set, just verify
  479. * that all subsequent log groups are either in
  480. * their initial state or follow the same
  481. * indices.
  482. */
  483. if (j == log_index[0]) {
  484. /* entry must be 'valid' */
  485. if (ent_is_padding(&log.ent[j]))
  486. return -ENXIO;
  487. } else if (j == log_index[1]) {
  488. ;
  489. /*
  490. * log_index[1] can be padding if the
  491. * lane never got used and it is still
  492. * in the initial state (three 'padding'
  493. * entries)
  494. */
  495. } else {
  496. /* entry must be invalid (padding) */
  497. if (!ent_is_padding(&log.ent[j]))
  498. return -ENXIO;
  499. }
  500. }
  501. }
  502. /*
  503. * If any of the log_groups have more than one valid,
  504. * non-padding entry, then the we are no longer in the
  505. * initial_state
  506. */
  507. if (pad_count < 3)
  508. initial_state = false;
  509. pad_count = 0;
  510. }
  511. if (!initial_state && !idx_set)
  512. return -ENXIO;
  513. /*
  514. * If all the entries in the log were in the initial state,
  515. * assume new padding scheme
  516. */
  517. if (initial_state)
  518. log_index[1] = 1;
  519. /*
  520. * Only allow the known permutations of log/padding indices,
  521. * i.e. (0, 1), and (0, 2)
  522. */
  523. if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
  524. ; /* known index possibilities */
  525. else {
  526. dev_err(to_dev(arena), "Found an unknown padding scheme\n");
  527. return -ENXIO;
  528. }
  529. arena->log_index[0] = log_index[0];
  530. arena->log_index[1] = log_index[1];
  531. dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
  532. dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
  533. return 0;
  534. }
  535. static int btt_rtt_init(struct arena_info *arena)
  536. {
  537. arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
  538. if (arena->rtt == NULL)
  539. return -ENOMEM;
  540. return 0;
  541. }
  542. static int btt_maplocks_init(struct arena_info *arena)
  543. {
  544. u32 i;
  545. arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock),
  546. GFP_KERNEL);
  547. if (!arena->map_locks)
  548. return -ENOMEM;
  549. for (i = 0; i < arena->nfree; i++)
  550. spin_lock_init(&arena->map_locks[i].lock);
  551. return 0;
  552. }
  553. static struct arena_info *alloc_arena(struct btt *btt, size_t size,
  554. size_t start, size_t arena_off)
  555. {
  556. struct arena_info *arena;
  557. u64 logsize, mapsize, datasize;
  558. u64 available = size;
  559. arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL);
  560. if (!arena)
  561. return NULL;
  562. arena->nd_btt = btt->nd_btt;
  563. if (!size)
  564. return arena;
  565. arena->size = size;
  566. arena->external_lba_start = start;
  567. arena->external_lbasize = btt->lbasize;
  568. arena->internal_lbasize = roundup(arena->external_lbasize,
  569. INT_LBASIZE_ALIGNMENT);
  570. arena->nfree = BTT_DEFAULT_NFREE;
  571. arena->version_major = 1;
  572. arena->version_minor = 1;
  573. if (available % BTT_PG_SIZE)
  574. available -= (available % BTT_PG_SIZE);
  575. /* Two pages are reserved for the super block and its copy */
  576. available -= 2 * BTT_PG_SIZE;
  577. /* The log takes a fixed amount of space based on nfree */
  578. logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
  579. available -= logsize;
  580. /* Calculate optimal split between map and data area */
  581. arena->internal_nlba = div_u64(available - BTT_PG_SIZE,
  582. arena->internal_lbasize + MAP_ENT_SIZE);
  583. arena->external_nlba = arena->internal_nlba - arena->nfree;
  584. mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE);
  585. datasize = available - mapsize;
  586. /* 'Absolute' values, relative to start of storage space */
  587. arena->infooff = arena_off;
  588. arena->dataoff = arena->infooff + BTT_PG_SIZE;
  589. arena->mapoff = arena->dataoff + datasize;
  590. arena->logoff = arena->mapoff + mapsize;
  591. arena->info2off = arena->logoff + logsize;
  592. /* Default log indices are (0,1) */
  593. arena->log_index[0] = 0;
  594. arena->log_index[1] = 1;
  595. return arena;
  596. }
  597. static void free_arenas(struct btt *btt)
  598. {
  599. struct arena_info *arena, *next;
  600. list_for_each_entry_safe(arena, next, &btt->arena_list, list) {
  601. list_del(&arena->list);
  602. kfree(arena->rtt);
  603. kfree(arena->map_locks);
  604. kfree(arena->freelist);
  605. debugfs_remove_recursive(arena->debugfs_dir);
  606. kfree(arena);
  607. }
  608. }
  609. /*
  610. * This function reads an existing valid btt superblock and
  611. * populates the corresponding arena_info struct
  612. */
  613. static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
  614. u64 arena_off)
  615. {
  616. arena->internal_nlba = le32_to_cpu(super->internal_nlba);
  617. arena->internal_lbasize = le32_to_cpu(super->internal_lbasize);
  618. arena->external_nlba = le32_to_cpu(super->external_nlba);
  619. arena->external_lbasize = le32_to_cpu(super->external_lbasize);
  620. arena->nfree = le32_to_cpu(super->nfree);
  621. arena->version_major = le16_to_cpu(super->version_major);
  622. arena->version_minor = le16_to_cpu(super->version_minor);
  623. arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off +
  624. le64_to_cpu(super->nextoff));
  625. arena->infooff = arena_off;
  626. arena->dataoff = arena_off + le64_to_cpu(super->dataoff);
  627. arena->mapoff = arena_off + le64_to_cpu(super->mapoff);
  628. arena->logoff = arena_off + le64_to_cpu(super->logoff);
  629. arena->info2off = arena_off + le64_to_cpu(super->info2off);
  630. arena->size = (le64_to_cpu(super->nextoff) > 0)
  631. ? (le64_to_cpu(super->nextoff))
  632. : (arena->info2off - arena->infooff + BTT_PG_SIZE);
  633. arena->flags = le32_to_cpu(super->flags);
  634. }
  635. static int discover_arenas(struct btt *btt)
  636. {
  637. int ret = 0;
  638. struct arena_info *arena;
  639. struct btt_sb *super;
  640. size_t remaining = btt->rawsize;
  641. u64 cur_nlba = 0;
  642. size_t cur_off = 0;
  643. int num_arenas = 0;
  644. super = kzalloc(sizeof(*super), GFP_KERNEL);
  645. if (!super)
  646. return -ENOMEM;
  647. while (remaining) {
  648. /* Alloc memory for arena */
  649. arena = alloc_arena(btt, 0, 0, 0);
  650. if (!arena) {
  651. ret = -ENOMEM;
  652. goto out_super;
  653. }
  654. arena->infooff = cur_off;
  655. ret = btt_info_read(arena, super);
  656. if (ret)
  657. goto out;
  658. if (!nd_btt_arena_is_valid(btt->nd_btt, super)) {
  659. if (remaining == btt->rawsize) {
  660. btt->init_state = INIT_NOTFOUND;
  661. dev_info(to_dev(arena), "No existing arenas\n");
  662. goto out;
  663. } else {
  664. dev_info(to_dev(arena),
  665. "Found corrupted metadata!\n");
  666. ret = -ENODEV;
  667. goto out;
  668. }
  669. }
  670. arena->external_lba_start = cur_nlba;
  671. parse_arena_meta(arena, super, cur_off);
  672. ret = log_set_indices(arena);
  673. if (ret) {
  674. dev_err(to_dev(arena),
  675. "Unable to deduce log/padding indices\n");
  676. goto out;
  677. }
  678. ret = btt_freelist_init(arena);
  679. if (ret)
  680. goto out;
  681. ret = btt_rtt_init(arena);
  682. if (ret)
  683. goto out;
  684. ret = btt_maplocks_init(arena);
  685. if (ret)
  686. goto out;
  687. list_add_tail(&arena->list, &btt->arena_list);
  688. remaining -= arena->size;
  689. cur_off += arena->size;
  690. cur_nlba += arena->external_nlba;
  691. num_arenas++;
  692. if (arena->nextoff == 0)
  693. break;
  694. }
  695. btt->num_arenas = num_arenas;
  696. btt->nlba = cur_nlba;
  697. btt->init_state = INIT_READY;
  698. kfree(super);
  699. return ret;
  700. out:
  701. kfree(arena);
  702. free_arenas(btt);
  703. out_super:
  704. kfree(super);
  705. return ret;
  706. }
  707. static int create_arenas(struct btt *btt)
  708. {
  709. size_t remaining = btt->rawsize;
  710. size_t cur_off = 0;
  711. while (remaining) {
  712. struct arena_info *arena;
  713. size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining);
  714. remaining -= arena_size;
  715. if (arena_size < ARENA_MIN_SIZE)
  716. break;
  717. arena = alloc_arena(btt, arena_size, btt->nlba, cur_off);
  718. if (!arena) {
  719. free_arenas(btt);
  720. return -ENOMEM;
  721. }
  722. btt->nlba += arena->external_nlba;
  723. if (remaining >= ARENA_MIN_SIZE)
  724. arena->nextoff = arena->size;
  725. else
  726. arena->nextoff = 0;
  727. cur_off += arena_size;
  728. list_add_tail(&arena->list, &btt->arena_list);
  729. }
  730. return 0;
  731. }
  732. /*
  733. * This function completes arena initialization by writing
  734. * all the metadata.
  735. * It is only called for an uninitialized arena when a write
  736. * to that arena occurs for the first time.
  737. */
  738. static int btt_arena_write_layout(struct arena_info *arena)
  739. {
  740. int ret;
  741. u64 sum;
  742. struct btt_sb *super;
  743. struct nd_btt *nd_btt = arena->nd_btt;
  744. const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
  745. ret = btt_map_init(arena);
  746. if (ret)
  747. return ret;
  748. ret = btt_log_init(arena);
  749. if (ret)
  750. return ret;
  751. super = kzalloc(sizeof(struct btt_sb), GFP_NOIO);
  752. if (!super)
  753. return -ENOMEM;
  754. strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
  755. memcpy(super->uuid, nd_btt->uuid, 16);
  756. memcpy(super->parent_uuid, parent_uuid, 16);
  757. super->flags = cpu_to_le32(arena->flags);
  758. super->version_major = cpu_to_le16(arena->version_major);
  759. super->version_minor = cpu_to_le16(arena->version_minor);
  760. super->external_lbasize = cpu_to_le32(arena->external_lbasize);
  761. super->external_nlba = cpu_to_le32(arena->external_nlba);
  762. super->internal_lbasize = cpu_to_le32(arena->internal_lbasize);
  763. super->internal_nlba = cpu_to_le32(arena->internal_nlba);
  764. super->nfree = cpu_to_le32(arena->nfree);
  765. super->infosize = cpu_to_le32(sizeof(struct btt_sb));
  766. super->nextoff = cpu_to_le64(arena->nextoff);
  767. /*
  768. * Subtract arena->infooff (arena start) so numbers are relative
  769. * to 'this' arena
  770. */
  771. super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff);
  772. super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff);
  773. super->logoff = cpu_to_le64(arena->logoff - arena->infooff);
  774. super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
  775. super->flags = 0;
  776. sum = nd_sb_checksum((struct nd_gen_sb *) super);
  777. super->checksum = cpu_to_le64(sum);
  778. ret = btt_info_write(arena, super);
  779. kfree(super);
  780. return ret;
  781. }
  782. /*
  783. * This function completes the initialization for the BTT namespace
  784. * such that it is ready to accept IOs
  785. */
  786. static int btt_meta_init(struct btt *btt)
  787. {
  788. int ret = 0;
  789. struct arena_info *arena;
  790. mutex_lock(&btt->init_lock);
  791. list_for_each_entry(arena, &btt->arena_list, list) {
  792. ret = btt_arena_write_layout(arena);
  793. if (ret)
  794. goto unlock;
  795. ret = btt_freelist_init(arena);
  796. if (ret)
  797. goto unlock;
  798. ret = btt_rtt_init(arena);
  799. if (ret)
  800. goto unlock;
  801. ret = btt_maplocks_init(arena);
  802. if (ret)
  803. goto unlock;
  804. }
  805. btt->init_state = INIT_READY;
  806. unlock:
  807. mutex_unlock(&btt->init_lock);
  808. return ret;
  809. }
  810. static u32 btt_meta_size(struct btt *btt)
  811. {
  812. return btt->lbasize - btt->sector_size;
  813. }
  814. /*
  815. * This function calculates the arena in which the given LBA lies
  816. * by doing a linear walk. This is acceptable since we expect only
  817. * a few arenas. If we have backing devices that get much larger,
  818. * we can construct a balanced binary tree of arenas at init time
  819. * so that this range search becomes faster.
  820. */
  821. static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap,
  822. struct arena_info **arena)
  823. {
  824. struct arena_info *arena_list;
  825. __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size);
  826. list_for_each_entry(arena_list, &btt->arena_list, list) {
  827. if (lba < arena_list->external_nlba) {
  828. *arena = arena_list;
  829. *premap = lba;
  830. return 0;
  831. }
  832. lba -= arena_list->external_nlba;
  833. }
  834. return -EIO;
  835. }
  836. /*
  837. * The following (lock_map, unlock_map) are mostly just to improve
  838. * readability, since they index into an array of locks
  839. */
  840. static void lock_map(struct arena_info *arena, u32 premap)
  841. __acquires(&arena->map_locks[idx].lock)
  842. {
  843. u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
  844. spin_lock(&arena->map_locks[idx].lock);
  845. }
  846. static void unlock_map(struct arena_info *arena, u32 premap)
  847. __releases(&arena->map_locks[idx].lock)
  848. {
  849. u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
  850. spin_unlock(&arena->map_locks[idx].lock);
  851. }
  852. static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
  853. {
  854. return arena->dataoff + ((u64)lba * arena->internal_lbasize);
  855. }
  856. static int btt_data_read(struct arena_info *arena, struct page *page,
  857. unsigned int off, u32 lba, u32 len)
  858. {
  859. int ret;
  860. u64 nsoff = to_namespace_offset(arena, lba);
  861. void *mem = kmap_atomic(page);
  862. ret = arena_read_bytes(arena, nsoff, mem + off, len);
  863. kunmap_atomic(mem);
  864. return ret;
  865. }
  866. static int btt_data_write(struct arena_info *arena, u32 lba,
  867. struct page *page, unsigned int off, u32 len)
  868. {
  869. int ret;
  870. u64 nsoff = to_namespace_offset(arena, lba);
  871. void *mem = kmap_atomic(page);
  872. ret = arena_write_bytes(arena, nsoff, mem + off, len);
  873. kunmap_atomic(mem);
  874. return ret;
  875. }
  876. static void zero_fill_data(struct page *page, unsigned int off, u32 len)
  877. {
  878. void *mem = kmap_atomic(page);
  879. memset(mem + off, 0, len);
  880. kunmap_atomic(mem);
  881. }
  882. #ifdef CONFIG_BLK_DEV_INTEGRITY
  883. static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
  884. struct arena_info *arena, u32 postmap, int rw)
  885. {
  886. unsigned int len = btt_meta_size(btt);
  887. u64 meta_nsoff;
  888. int ret = 0;
  889. if (bip == NULL)
  890. return 0;
  891. meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size;
  892. while (len) {
  893. unsigned int cur_len;
  894. struct bio_vec bv;
  895. void *mem;
  896. bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
  897. /*
  898. * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
  899. * .bv_offset already adjusted for iter->bi_bvec_done, and we
  900. * can use those directly
  901. */
  902. cur_len = min(len, bv.bv_len);
  903. mem = kmap_atomic(bv.bv_page);
  904. if (rw)
  905. ret = arena_write_bytes(arena, meta_nsoff,
  906. mem + bv.bv_offset, cur_len);
  907. else
  908. ret = arena_read_bytes(arena, meta_nsoff,
  909. mem + bv.bv_offset, cur_len);
  910. kunmap_atomic(mem);
  911. if (ret)
  912. return ret;
  913. len -= cur_len;
  914. meta_nsoff += cur_len;
  915. bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
  916. }
  917. return ret;
  918. }
  919. #else /* CONFIG_BLK_DEV_INTEGRITY */
  920. static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
  921. struct arena_info *arena, u32 postmap, int rw)
  922. {
  923. return 0;
  924. }
  925. #endif
  926. static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
  927. struct page *page, unsigned int off, sector_t sector,
  928. unsigned int len)
  929. {
  930. int ret = 0;
  931. int t_flag, e_flag;
  932. struct arena_info *arena = NULL;
  933. u32 lane = 0, premap, postmap;
  934. while (len) {
  935. u32 cur_len;
  936. lane = nd_region_acquire_lane(btt->nd_region);
  937. ret = lba_to_arena(btt, sector, &premap, &arena);
  938. if (ret)
  939. goto out_lane;
  940. cur_len = min(btt->sector_size, len);
  941. ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
  942. if (ret)
  943. goto out_lane;
  944. /*
  945. * We loop to make sure that the post map LBA didn't change
  946. * from under us between writing the RTT and doing the actual
  947. * read.
  948. */
  949. while (1) {
  950. u32 new_map;
  951. if (t_flag) {
  952. zero_fill_data(page, off, cur_len);
  953. goto out_lane;
  954. }
  955. if (e_flag) {
  956. ret = -EIO;
  957. goto out_lane;
  958. }
  959. arena->rtt[lane] = RTT_VALID | postmap;
  960. /*
  961. * Barrier to make sure this write is not reordered
  962. * to do the verification map_read before the RTT store
  963. */
  964. barrier();
  965. ret = btt_map_read(arena, premap, &new_map, &t_flag,
  966. &e_flag);
  967. if (ret)
  968. goto out_rtt;
  969. if (postmap == new_map)
  970. break;
  971. postmap = new_map;
  972. }
  973. ret = btt_data_read(arena, page, off, postmap, cur_len);
  974. if (ret)
  975. goto out_rtt;
  976. if (bip) {
  977. ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
  978. if (ret)
  979. goto out_rtt;
  980. }
  981. arena->rtt[lane] = RTT_INVALID;
  982. nd_region_release_lane(btt->nd_region, lane);
  983. len -= cur_len;
  984. off += cur_len;
  985. sector += btt->sector_size >> SECTOR_SHIFT;
  986. }
  987. return 0;
  988. out_rtt:
  989. arena->rtt[lane] = RTT_INVALID;
  990. out_lane:
  991. nd_region_release_lane(btt->nd_region, lane);
  992. return ret;
  993. }
  994. static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
  995. sector_t sector, struct page *page, unsigned int off,
  996. unsigned int len)
  997. {
  998. int ret = 0;
  999. struct arena_info *arena = NULL;
  1000. u32 premap = 0, old_postmap, new_postmap, lane = 0, i;
  1001. struct log_entry log;
  1002. int sub;
  1003. while (len) {
  1004. u32 cur_len;
  1005. lane = nd_region_acquire_lane(btt->nd_region);
  1006. ret = lba_to_arena(btt, sector, &premap, &arena);
  1007. if (ret)
  1008. goto out_lane;
  1009. cur_len = min(btt->sector_size, len);
  1010. if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) {
  1011. ret = -EIO;
  1012. goto out_lane;
  1013. }
  1014. new_postmap = arena->freelist[lane].block;
  1015. /* Wait if the new block is being read from */
  1016. for (i = 0; i < arena->nfree; i++)
  1017. while (arena->rtt[i] == (RTT_VALID | new_postmap))
  1018. cpu_relax();
  1019. if (new_postmap >= arena->internal_nlba) {
  1020. ret = -EIO;
  1021. goto out_lane;
  1022. }
  1023. ret = btt_data_write(arena, new_postmap, page, off, cur_len);
  1024. if (ret)
  1025. goto out_lane;
  1026. if (bip) {
  1027. ret = btt_rw_integrity(btt, bip, arena, new_postmap,
  1028. WRITE);
  1029. if (ret)
  1030. goto out_lane;
  1031. }
  1032. lock_map(arena, premap);
  1033. ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
  1034. if (ret)
  1035. goto out_map;
  1036. if (old_postmap >= arena->internal_nlba) {
  1037. ret = -EIO;
  1038. goto out_map;
  1039. }
  1040. log.lba = cpu_to_le32(premap);
  1041. log.old_map = cpu_to_le32(old_postmap);
  1042. log.new_map = cpu_to_le32(new_postmap);
  1043. log.seq = cpu_to_le32(arena->freelist[lane].seq);
  1044. sub = arena->freelist[lane].sub;
  1045. ret = btt_flog_write(arena, lane, sub, &log);
  1046. if (ret)
  1047. goto out_map;
  1048. ret = btt_map_write(arena, premap, new_postmap, 0, 0);
  1049. if (ret)
  1050. goto out_map;
  1051. unlock_map(arena, premap);
  1052. nd_region_release_lane(btt->nd_region, lane);
  1053. len -= cur_len;
  1054. off += cur_len;
  1055. sector += btt->sector_size >> SECTOR_SHIFT;
  1056. }
  1057. return 0;
  1058. out_map:
  1059. unlock_map(arena, premap);
  1060. out_lane:
  1061. nd_region_release_lane(btt->nd_region, lane);
  1062. return ret;
  1063. }
  1064. static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
  1065. struct page *page, unsigned int len, unsigned int off,
  1066. bool is_write, sector_t sector)
  1067. {
  1068. int ret;
  1069. if (!is_write) {
  1070. ret = btt_read_pg(btt, bip, page, off, sector, len);
  1071. flush_dcache_page(page);
  1072. } else {
  1073. flush_dcache_page(page);
  1074. ret = btt_write_pg(btt, bip, sector, page, off, len);
  1075. }
  1076. return ret;
  1077. }
  1078. static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
  1079. {
  1080. struct bio_integrity_payload *bip = bio_integrity(bio);
  1081. struct btt *btt = q->queuedata;
  1082. struct bvec_iter iter;
  1083. unsigned long start;
  1084. struct bio_vec bvec;
  1085. int err = 0;
  1086. bool do_acct;
  1087. /*
  1088. * bio_integrity_enabled also checks if the bio already has an
  1089. * integrity payload attached. If it does, we *don't* do a
  1090. * bio_integrity_prep here - the payload has been generated by
  1091. * another kernel subsystem, and we just pass it through.
  1092. */
  1093. if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
  1094. bio->bi_error = -EIO;
  1095. goto out;
  1096. }
  1097. do_acct = nd_iostat_start(bio, &start);
  1098. bio_for_each_segment(bvec, bio, iter) {
  1099. unsigned int len = bvec.bv_len;
  1100. BUG_ON(len > PAGE_SIZE);
  1101. /* Make sure len is in multiples of sector size. */
  1102. /* XXX is this right? */
  1103. BUG_ON(len < btt->sector_size);
  1104. BUG_ON(len % btt->sector_size);
  1105. err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
  1106. op_is_write(bio_op(bio)), iter.bi_sector);
  1107. if (err) {
  1108. dev_info(&btt->nd_btt->dev,
  1109. "io error in %s sector %lld, len %d,\n",
  1110. (op_is_write(bio_op(bio))) ? "WRITE" :
  1111. "READ",
  1112. (unsigned long long) iter.bi_sector, len);
  1113. bio->bi_error = err;
  1114. break;
  1115. }
  1116. }
  1117. if (do_acct)
  1118. nd_iostat_end(bio, start);
  1119. out:
  1120. bio_endio(bio);
  1121. return BLK_QC_T_NONE;
  1122. }
  1123. static int btt_rw_page(struct block_device *bdev, sector_t sector,
  1124. struct page *page, bool is_write)
  1125. {
  1126. struct btt *btt = bdev->bd_disk->private_data;
  1127. int rc;
  1128. rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
  1129. if (rc == 0)
  1130. page_endio(page, is_write, 0);
  1131. return rc;
  1132. }
  1133. static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
  1134. {
  1135. /* some standard values */
  1136. geo->heads = 1 << 6;
  1137. geo->sectors = 1 << 5;
  1138. geo->cylinders = get_capacity(bd->bd_disk) >> 11;
  1139. return 0;
  1140. }
  1141. static const struct block_device_operations btt_fops = {
  1142. .owner = THIS_MODULE,
  1143. .rw_page = btt_rw_page,
  1144. .getgeo = btt_getgeo,
  1145. .revalidate_disk = nvdimm_revalidate_disk,
  1146. };
  1147. static int btt_blk_init(struct btt *btt)
  1148. {
  1149. struct nd_btt *nd_btt = btt->nd_btt;
  1150. struct nd_namespace_common *ndns = nd_btt->ndns;
  1151. /* create a new disk and request queue for btt */
  1152. btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
  1153. if (!btt->btt_queue)
  1154. return -ENOMEM;
  1155. btt->btt_disk = alloc_disk(0);
  1156. if (!btt->btt_disk) {
  1157. blk_cleanup_queue(btt->btt_queue);
  1158. return -ENOMEM;
  1159. }
  1160. nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
  1161. btt->btt_disk->first_minor = 0;
  1162. btt->btt_disk->fops = &btt_fops;
  1163. btt->btt_disk->private_data = btt;
  1164. btt->btt_disk->queue = btt->btt_queue;
  1165. btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
  1166. blk_queue_make_request(btt->btt_queue, btt_make_request);
  1167. blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
  1168. blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
  1169. blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
  1170. queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
  1171. btt->btt_queue->queuedata = btt;
  1172. if (btt_meta_size(btt)) {
  1173. int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
  1174. if (rc) {
  1175. del_gendisk(btt->btt_disk);
  1176. put_disk(btt->btt_disk);
  1177. blk_cleanup_queue(btt->btt_queue);
  1178. return rc;
  1179. }
  1180. }
  1181. set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
  1182. device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
  1183. btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
  1184. revalidate_disk(btt->btt_disk);
  1185. return 0;
  1186. }
  1187. static void btt_blk_cleanup(struct btt *btt)
  1188. {
  1189. del_gendisk(btt->btt_disk);
  1190. put_disk(btt->btt_disk);
  1191. blk_cleanup_queue(btt->btt_queue);
  1192. }
  1193. /**
  1194. * btt_init - initialize a block translation table for the given device
  1195. * @nd_btt: device with BTT geometry and backing device info
  1196. * @rawsize: raw size in bytes of the backing device
  1197. * @lbasize: lba size of the backing device
  1198. * @uuid: A uuid for the backing device - this is stored on media
  1199. * @maxlane: maximum number of parallel requests the device can handle
  1200. *
  1201. * Initialize a Block Translation Table on a backing device to provide
  1202. * single sector power fail atomicity.
  1203. *
  1204. * Context:
  1205. * Might sleep.
  1206. *
  1207. * Returns:
  1208. * Pointer to a new struct btt on success, NULL on failure.
  1209. */
  1210. static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
  1211. u32 lbasize, u8 *uuid, struct nd_region *nd_region)
  1212. {
  1213. int ret;
  1214. struct btt *btt;
  1215. struct device *dev = &nd_btt->dev;
  1216. btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
  1217. if (!btt)
  1218. return NULL;
  1219. btt->nd_btt = nd_btt;
  1220. btt->rawsize = rawsize;
  1221. btt->lbasize = lbasize;
  1222. btt->sector_size = ((lbasize >= 4096) ? 4096 : 512);
  1223. INIT_LIST_HEAD(&btt->arena_list);
  1224. mutex_init(&btt->init_lock);
  1225. btt->nd_region = nd_region;
  1226. ret = discover_arenas(btt);
  1227. if (ret) {
  1228. dev_err(dev, "init: error in arena_discover: %d\n", ret);
  1229. return NULL;
  1230. }
  1231. if (btt->init_state != INIT_READY && nd_region->ro) {
  1232. dev_info(dev, "%s is read-only, unable to init btt metadata\n",
  1233. dev_name(&nd_region->dev));
  1234. return NULL;
  1235. } else if (btt->init_state != INIT_READY) {
  1236. btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
  1237. ((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
  1238. dev_dbg(dev, "init: %d arenas for %llu rawsize\n",
  1239. btt->num_arenas, rawsize);
  1240. ret = create_arenas(btt);
  1241. if (ret) {
  1242. dev_info(dev, "init: create_arenas: %d\n", ret);
  1243. return NULL;
  1244. }
  1245. ret = btt_meta_init(btt);
  1246. if (ret) {
  1247. dev_err(dev, "init: error in meta_init: %d\n", ret);
  1248. return NULL;
  1249. }
  1250. }
  1251. ret = btt_blk_init(btt);
  1252. if (ret) {
  1253. dev_err(dev, "init: error in blk_init: %d\n", ret);
  1254. return NULL;
  1255. }
  1256. btt_debugfs_init(btt);
  1257. return btt;
  1258. }
  1259. /**
  1260. * btt_fini - de-initialize a BTT
  1261. * @btt: the BTT handle that was generated by btt_init
  1262. *
  1263. * De-initialize a Block Translation Table on device removal
  1264. *
  1265. * Context:
  1266. * Might sleep.
  1267. */
  1268. static void btt_fini(struct btt *btt)
  1269. {
  1270. if (btt) {
  1271. btt_blk_cleanup(btt);
  1272. free_arenas(btt);
  1273. debugfs_remove_recursive(btt->debugfs_dir);
  1274. }
  1275. }
  1276. int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
  1277. {
  1278. struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
  1279. struct nd_region *nd_region;
  1280. struct btt *btt;
  1281. size_t rawsize;
  1282. if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) {
  1283. dev_dbg(&nd_btt->dev, "incomplete btt configuration\n");
  1284. return -ENODEV;
  1285. }
  1286. rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
  1287. if (rawsize < ARENA_MIN_SIZE) {
  1288. dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
  1289. dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K);
  1290. return -ENXIO;
  1291. }
  1292. nd_region = to_nd_region(nd_btt->dev.parent);
  1293. btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
  1294. nd_region);
  1295. if (!btt)
  1296. return -ENOMEM;
  1297. nd_btt->btt = btt;
  1298. return 0;
  1299. }
  1300. EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
  1301. int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt)
  1302. {
  1303. struct btt *btt = nd_btt->btt;
  1304. btt_fini(btt);
  1305. nd_btt->btt = NULL;
  1306. return 0;
  1307. }
  1308. EXPORT_SYMBOL(nvdimm_namespace_detach_btt);
  1309. static int __init nd_btt_init(void)
  1310. {
  1311. int rc = 0;
  1312. debugfs_root = debugfs_create_dir("btt", NULL);
  1313. if (IS_ERR_OR_NULL(debugfs_root))
  1314. rc = -ENXIO;
  1315. return rc;
  1316. }
  1317. static void __exit nd_btt_exit(void)
  1318. {
  1319. debugfs_remove_recursive(debugfs_root);
  1320. }
  1321. MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);
  1322. MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>");
  1323. MODULE_LICENSE("GPL v2");
  1324. module_init(nd_btt_init);
  1325. module_exit(nd_btt_exit);