blockcheck.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * blockcheck.c
  5. *
  6. * Checksum and ECC codes for the OCFS2 userspace library.
  7. *
  8. * Copyright (C) 2006, 2008 Oracle. All rights reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public
  12. * License, version 2, as published by the Free Software Foundation.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. */
  19. #include <linux/kernel.h>
  20. #include <linux/types.h>
  21. #include <linux/crc32.h>
  22. #include <linux/buffer_head.h>
  23. #include <linux/bitops.h>
  24. #include <linux/debugfs.h>
  25. #include <linux/module.h>
  26. #include <linux/fs.h>
  27. #include <asm/byteorder.h>
  28. #include <cluster/masklog.h>
  29. #include "ocfs2.h"
  30. #include "blockcheck.h"
  31. /*
  32. * We use the following conventions:
  33. *
  34. * d = # data bits
  35. * p = # parity bits
  36. * c = # total code bits (d + p)
  37. */
  38. /*
  39. * Calculate the bit offset in the hamming code buffer based on the bit's
  40. * offset in the data buffer. Since the hamming code reserves all
  41. * power-of-two bits for parity, the data bit number and the code bit
  42. * number are offset by all the parity bits beforehand.
  43. *
  44. * Recall that bit numbers in hamming code are 1-based. This function
  45. * takes the 0-based data bit from the caller.
  46. *
  47. * An example. Take bit 1 of the data buffer. 1 is a power of two (2^0),
  48. * so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit.
  49. * 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3
  50. * in the code buffer.
  51. *
  52. * The caller can pass in *p if it wants to keep track of the most recent
  53. * number of parity bits added. This allows the function to start the
  54. * calculation at the last place.
  55. */
  56. static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
  57. {
  58. unsigned int b, p = 0;
  59. /*
  60. * Data bits are 0-based, but we're talking code bits, which
  61. * are 1-based.
  62. */
  63. b = i + 1;
  64. /* Use the cache if it is there */
  65. if (p_cache)
  66. p = *p_cache;
  67. b += p;
  68. /*
  69. * For every power of two below our bit number, bump our bit.
  70. *
  71. * We compare with (b + 1) because we have to compare with what b
  72. * would be _if_ it were bumped up by the parity bit. Capice?
  73. *
  74. * p is set above.
  75. */
  76. for (; (1 << p) < (b + 1); p++)
  77. b++;
  78. if (p_cache)
  79. *p_cache = p;
  80. return b;
  81. }
  82. /*
  83. * This is the low level encoder function. It can be called across
  84. * multiple hunks just like the crc32 code. 'd' is the number of bits
  85. * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
  86. * two 512B buffers, you would do it like so:
  87. *
  88. * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
  89. * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
  90. *
  91. * If you just have one buffer, use ocfs2_hamming_encode_block().
  92. */
  93. u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
  94. {
  95. unsigned int i, b, p = 0;
  96. BUG_ON(!d);
  97. /*
  98. * b is the hamming code bit number. Hamming code specifies a
  99. * 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is
  100. * for the algorithm.
  101. *
  102. * The i++ in the for loop is so that the start offset passed
  103. * to ocfs2_find_next_bit_set() is one greater than the previously
  104. * found bit.
  105. */
  106. for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
  107. {
  108. /*
  109. * i is the offset in this hunk, nr + i is the total bit
  110. * offset.
  111. */
  112. b = calc_code_bit(nr + i, &p);
  113. /*
  114. * Data bits in the resultant code are checked by
  115. * parity bits that are part of the bit number
  116. * representation. Huh?
  117. *
  118. * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
  119. * In other words, the parity bit at position 2^k
  120. * checks bits in positions having bit k set in
  121. * their binary representation. Conversely, for
  122. * instance, bit 13, i.e. 1101(2), is checked by
  123. * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
  124. * </wikipedia>
  125. *
  126. * Note that 'k' is the _code_ bit number. 'b' in
  127. * our loop.
  128. */
  129. parity ^= b;
  130. }
  131. /* While the data buffer was treated as little endian, the
  132. * return value is in host endian. */
  133. return parity;
  134. }
  135. u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
  136. {
  137. return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
  138. }
  139. /*
  140. * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
  141. * offset of the current hunk. If bit to be fixed is not part of the
  142. * current hunk, this does nothing.
  143. *
  144. * If you only have one hunk, use ocfs2_hamming_fix_block().
  145. */
  146. void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
  147. unsigned int fix)
  148. {
  149. unsigned int i, b;
  150. BUG_ON(!d);
  151. /*
  152. * If the bit to fix has an hweight of 1, it's a parity bit. One
  153. * busted parity bit is its own error. Nothing to do here.
  154. */
  155. if (hweight32(fix) == 1)
  156. return;
  157. /*
  158. * nr + d is the bit right past the data hunk we're looking at.
  159. * If fix after that, nothing to do
  160. */
  161. if (fix >= calc_code_bit(nr + d, NULL))
  162. return;
  163. /*
  164. * nr is the offset in the data hunk we're starting at. Let's
  165. * start b at the offset in the code buffer. See hamming_encode()
  166. * for a more detailed description of 'b'.
  167. */
  168. b = calc_code_bit(nr, NULL);
  169. /* If the fix is before this hunk, nothing to do */
  170. if (fix < b)
  171. return;
  172. for (i = 0; i < d; i++, b++)
  173. {
  174. /* Skip past parity bits */
  175. while (hweight32(b) == 1)
  176. b++;
  177. /*
  178. * i is the offset in this data hunk.
  179. * nr + i is the offset in the total data buffer.
  180. * b is the offset in the total code buffer.
  181. *
  182. * Thus, when b == fix, bit i in the current hunk needs
  183. * fixing.
  184. */
  185. if (b == fix)
  186. {
  187. if (ocfs2_test_bit(i, data))
  188. ocfs2_clear_bit(i, data);
  189. else
  190. ocfs2_set_bit(i, data);
  191. break;
  192. }
  193. }
  194. }
  195. void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
  196. unsigned int fix)
  197. {
  198. ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
  199. }
  200. /*
  201. * Debugfs handling.
  202. */
  203. #ifdef CONFIG_DEBUG_FS
  204. static int blockcheck_u64_get(void *data, u64 *val)
  205. {
  206. *val = *(u64 *)data;
  207. return 0;
  208. }
  209. DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
  210. static struct dentry *blockcheck_debugfs_create(const char *name,
  211. struct dentry *parent,
  212. u64 *value)
  213. {
  214. return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
  215. &blockcheck_fops);
  216. }
  217. static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
  218. {
  219. if (stats) {
  220. debugfs_remove(stats->b_debug_check);
  221. stats->b_debug_check = NULL;
  222. debugfs_remove(stats->b_debug_failure);
  223. stats->b_debug_failure = NULL;
  224. debugfs_remove(stats->b_debug_recover);
  225. stats->b_debug_recover = NULL;
  226. debugfs_remove(stats->b_debug_dir);
  227. stats->b_debug_dir = NULL;
  228. }
  229. }
  230. static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
  231. struct dentry *parent)
  232. {
  233. int rc = -EINVAL;
  234. if (!stats)
  235. goto out;
  236. stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
  237. if (!stats->b_debug_dir)
  238. goto out;
  239. stats->b_debug_check =
  240. blockcheck_debugfs_create("blocks_checked",
  241. stats->b_debug_dir,
  242. &stats->b_check_count);
  243. stats->b_debug_failure =
  244. blockcheck_debugfs_create("checksums_failed",
  245. stats->b_debug_dir,
  246. &stats->b_failure_count);
  247. stats->b_debug_recover =
  248. blockcheck_debugfs_create("ecc_recoveries",
  249. stats->b_debug_dir,
  250. &stats->b_recover_count);
  251. if (stats->b_debug_check && stats->b_debug_failure &&
  252. stats->b_debug_recover)
  253. rc = 0;
  254. out:
  255. if (rc)
  256. ocfs2_blockcheck_debug_remove(stats);
  257. return rc;
  258. }
  259. #else
  260. static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
  261. struct dentry *parent)
  262. {
  263. return 0;
  264. }
  265. static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
  266. {
  267. }
  268. #endif /* CONFIG_DEBUG_FS */
  269. /* Always-called wrappers for starting and stopping the debugfs files */
  270. int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
  271. struct dentry *parent)
  272. {
  273. return ocfs2_blockcheck_debug_install(stats, parent);
  274. }
  275. void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)
  276. {
  277. ocfs2_blockcheck_debug_remove(stats);
  278. }
  279. static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats)
  280. {
  281. u64 new_count;
  282. if (!stats)
  283. return;
  284. spin_lock(&stats->b_lock);
  285. stats->b_check_count++;
  286. new_count = stats->b_check_count;
  287. spin_unlock(&stats->b_lock);
  288. if (!new_count)
  289. mlog(ML_NOTICE, "Block check count has wrapped\n");
  290. }
  291. static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats)
  292. {
  293. u64 new_count;
  294. if (!stats)
  295. return;
  296. spin_lock(&stats->b_lock);
  297. stats->b_failure_count++;
  298. new_count = stats->b_failure_count;
  299. spin_unlock(&stats->b_lock);
  300. if (!new_count)
  301. mlog(ML_NOTICE, "Checksum failure count has wrapped\n");
  302. }
  303. static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats)
  304. {
  305. u64 new_count;
  306. if (!stats)
  307. return;
  308. spin_lock(&stats->b_lock);
  309. stats->b_recover_count++;
  310. new_count = stats->b_recover_count;
  311. spin_unlock(&stats->b_lock);
  312. if (!new_count)
  313. mlog(ML_NOTICE, "ECC recovery count has wrapped\n");
  314. }
  315. /*
  316. * These are the low-level APIs for using the ocfs2_block_check structure.
  317. */
  318. /*
  319. * This function generates check information for a block.
  320. * data is the block to be checked. bc is a pointer to the
  321. * ocfs2_block_check structure describing the crc32 and the ecc.
  322. *
  323. * bc should be a pointer inside data, as the function will
  324. * take care of zeroing it before calculating the check information. If
  325. * bc does not point inside data, the caller must make sure any inline
  326. * ocfs2_block_check structures are zeroed.
  327. *
  328. * The data buffer must be in on-disk endian (little endian for ocfs2).
  329. * bc will be filled with little-endian values and will be ready to go to
  330. * disk.
  331. */
  332. void ocfs2_block_check_compute(void *data, size_t blocksize,
  333. struct ocfs2_block_check *bc)
  334. {
  335. u32 crc;
  336. u32 ecc;
  337. memset(bc, 0, sizeof(struct ocfs2_block_check));
  338. crc = crc32_le(~0, data, blocksize);
  339. ecc = ocfs2_hamming_encode_block(data, blocksize);
  340. /*
  341. * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
  342. * larger than 16 bits.
  343. */
  344. BUG_ON(ecc > USHRT_MAX);
  345. bc->bc_crc32e = cpu_to_le32(crc);
  346. bc->bc_ecc = cpu_to_le16((u16)ecc);
  347. }
  348. /*
  349. * This function validates existing check information. Like _compute,
  350. * the function will take care of zeroing bc before calculating check codes.
  351. * If bc is not a pointer inside data, the caller must have zeroed any
  352. * inline ocfs2_block_check structures.
  353. *
  354. * Again, the data passed in should be the on-disk endian.
  355. */
  356. int ocfs2_block_check_validate(void *data, size_t blocksize,
  357. struct ocfs2_block_check *bc,
  358. struct ocfs2_blockcheck_stats *stats)
  359. {
  360. int rc = 0;
  361. u32 bc_crc32e;
  362. u16 bc_ecc;
  363. u32 crc, ecc;
  364. ocfs2_blockcheck_inc_check(stats);
  365. bc_crc32e = le32_to_cpu(bc->bc_crc32e);
  366. bc_ecc = le16_to_cpu(bc->bc_ecc);
  367. memset(bc, 0, sizeof(struct ocfs2_block_check));
  368. /* Fast path - if the crc32 validates, we're good to go */
  369. crc = crc32_le(~0, data, blocksize);
  370. if (crc == bc_crc32e)
  371. goto out;
  372. ocfs2_blockcheck_inc_failure(stats);
  373. mlog(ML_ERROR,
  374. "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n",
  375. (unsigned int)bc_crc32e, (unsigned int)crc);
  376. /* Ok, try ECC fixups */
  377. ecc = ocfs2_hamming_encode_block(data, blocksize);
  378. ocfs2_hamming_fix_block(data, blocksize, ecc ^ bc_ecc);
  379. /* And check the crc32 again */
  380. crc = crc32_le(~0, data, blocksize);
  381. if (crc == bc_crc32e) {
  382. ocfs2_blockcheck_inc_recover(stats);
  383. goto out;
  384. }
  385. mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n",
  386. (unsigned int)bc_crc32e, (unsigned int)crc);
  387. rc = -EIO;
  388. out:
  389. bc->bc_crc32e = cpu_to_le32(bc_crc32e);
  390. bc->bc_ecc = cpu_to_le16(bc_ecc);
  391. return rc;
  392. }
  393. /*
  394. * This function generates check information for a list of buffer_heads.
  395. * bhs is the blocks to be checked. bc is a pointer to the
  396. * ocfs2_block_check structure describing the crc32 and the ecc.
  397. *
  398. * bc should be a pointer inside data, as the function will
  399. * take care of zeroing it before calculating the check information. If
  400. * bc does not point inside data, the caller must make sure any inline
  401. * ocfs2_block_check structures are zeroed.
  402. *
  403. * The data buffer must be in on-disk endian (little endian for ocfs2).
  404. * bc will be filled with little-endian values and will be ready to go to
  405. * disk.
  406. */
  407. void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
  408. struct ocfs2_block_check *bc)
  409. {
  410. int i;
  411. u32 crc, ecc;
  412. BUG_ON(nr < 0);
  413. if (!nr)
  414. return;
  415. memset(bc, 0, sizeof(struct ocfs2_block_check));
  416. for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
  417. crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
  418. /*
  419. * The number of bits in a buffer is obviously b_size*8.
  420. * The offset of this buffer is b_size*i, so the bit offset
  421. * of this buffer is b_size*8*i.
  422. */
  423. ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
  424. bhs[i]->b_size * 8,
  425. bhs[i]->b_size * 8 * i);
  426. }
  427. /*
  428. * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
  429. * larger than 16 bits.
  430. */
  431. BUG_ON(ecc > USHRT_MAX);
  432. bc->bc_crc32e = cpu_to_le32(crc);
  433. bc->bc_ecc = cpu_to_le16((u16)ecc);
  434. }
  435. /*
  436. * This function validates existing check information on a list of
  437. * buffer_heads. Like _compute_bhs, the function will take care of
  438. * zeroing bc before calculating check codes. If bc is not a pointer
  439. * inside data, the caller must have zeroed any inline
  440. * ocfs2_block_check structures.
  441. *
  442. * Again, the data passed in should be the on-disk endian.
  443. */
  444. int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
  445. struct ocfs2_block_check *bc,
  446. struct ocfs2_blockcheck_stats *stats)
  447. {
  448. int i, rc = 0;
  449. u32 bc_crc32e;
  450. u16 bc_ecc;
  451. u32 crc, ecc, fix;
  452. BUG_ON(nr < 0);
  453. if (!nr)
  454. return 0;
  455. ocfs2_blockcheck_inc_check(stats);
  456. bc_crc32e = le32_to_cpu(bc->bc_crc32e);
  457. bc_ecc = le16_to_cpu(bc->bc_ecc);
  458. memset(bc, 0, sizeof(struct ocfs2_block_check));
  459. /* Fast path - if the crc32 validates, we're good to go */
  460. for (i = 0, crc = ~0; i < nr; i++)
  461. crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
  462. if (crc == bc_crc32e)
  463. goto out;
  464. ocfs2_blockcheck_inc_failure(stats);
  465. mlog(ML_ERROR,
  466. "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
  467. (unsigned int)bc_crc32e, (unsigned int)crc);
  468. /* Ok, try ECC fixups */
  469. for (i = 0, ecc = 0; i < nr; i++) {
  470. /*
  471. * The number of bits in a buffer is obviously b_size*8.
  472. * The offset of this buffer is b_size*i, so the bit offset
  473. * of this buffer is b_size*8*i.
  474. */
  475. ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
  476. bhs[i]->b_size * 8,
  477. bhs[i]->b_size * 8 * i);
  478. }
  479. fix = ecc ^ bc_ecc;
  480. for (i = 0; i < nr; i++) {
  481. /*
  482. * Try the fix against each buffer. It will only affect
  483. * one of them.
  484. */
  485. ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
  486. bhs[i]->b_size * 8 * i, fix);
  487. }
  488. /* And check the crc32 again */
  489. for (i = 0, crc = ~0; i < nr; i++)
  490. crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
  491. if (crc == bc_crc32e) {
  492. ocfs2_blockcheck_inc_recover(stats);
  493. goto out;
  494. }
  495. mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
  496. (unsigned int)bc_crc32e, (unsigned int)crc);
  497. rc = -EIO;
  498. out:
  499. bc->bc_crc32e = cpu_to_le32(bc_crc32e);
  500. bc->bc_ecc = cpu_to_le16(bc_ecc);
  501. return rc;
  502. }
  503. /*
  504. * These are the main API. They check the superblock flag before
  505. * calling the underlying operations.
  506. *
  507. * They expect the buffer(s) to be in disk format.
  508. */
  509. void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
  510. struct ocfs2_block_check *bc)
  511. {
  512. if (ocfs2_meta_ecc(OCFS2_SB(sb)))
  513. ocfs2_block_check_compute(data, sb->s_blocksize, bc);
  514. }
  515. int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
  516. struct ocfs2_block_check *bc)
  517. {
  518. int rc = 0;
  519. struct ocfs2_super *osb = OCFS2_SB(sb);
  520. if (ocfs2_meta_ecc(osb))
  521. rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc,
  522. &osb->osb_ecc_stats);
  523. return rc;
  524. }
  525. void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
  526. struct buffer_head **bhs, int nr,
  527. struct ocfs2_block_check *bc)
  528. {
  529. if (ocfs2_meta_ecc(OCFS2_SB(sb)))
  530. ocfs2_block_check_compute_bhs(bhs, nr, bc);
  531. }
  532. int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
  533. struct buffer_head **bhs, int nr,
  534. struct ocfs2_block_check *bc)
  535. {
  536. int rc = 0;
  537. struct ocfs2_super *osb = OCFS2_SB(sb);
  538. if (ocfs2_meta_ecc(osb))
  539. rc = ocfs2_block_check_validate_bhs(bhs, nr, bc,
  540. &osb->osb_ecc_stats);
  541. return rc;
  542. }