dir.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2017 Omnibond Systems, L.L.C.
  4. */
  5. #include "protocol.h"
  6. #include "orangefs-kernel.h"
  7. #include "orangefs-bufmap.h"
  8. struct orangefs_dir_part {
  9. struct orangefs_dir_part *next;
  10. size_t len;
  11. };
  12. struct orangefs_dir {
  13. __u64 token;
  14. struct orangefs_dir_part *part;
  15. loff_t end;
  16. int error;
  17. };
  18. #define PART_SHIFT (24)
  19. #define PART_SIZE (1<<24)
  20. #define PART_MASK (~(PART_SIZE - 1))
  21. /*
  22. * There can be up to 512 directory entries. Each entry is encoded as
  23. * follows:
  24. * 4 bytes: string size (n)
  25. * n bytes: string
  26. * 1 byte: trailing zero
  27. * padding to 8 bytes
  28. * 16 bytes: khandle
  29. * padding to 8 bytes
  30. *
  31. * The trailer_buf starts with a struct orangefs_readdir_response_s
  32. * which must be skipped to get to the directory data.
  33. *
  34. * The data which is received from the userspace daemon is termed a
  35. * part and is stored in a linked list in case more than one part is
  36. * needed for a large directory.
  37. *
  38. * The position pointer (ctx->pos) encodes the part and offset on which
  39. * to begin reading at. Bits above PART_SHIFT encode the part and bits
  40. * below PART_SHIFT encode the offset. Parts are stored in a linked
  41. * list which grows as data is received from the server. The overhead
  42. * associated with managing the list is presumed to be small compared to
  43. * the overhead of communicating with the server.
  44. *
  45. * As data is received from the server, it is placed at the end of the
  46. * part list. Data is parsed from the current position as it is needed.
  47. * When data is determined to be corrupt, it is either because the
  48. * userspace component has sent back corrupt data or because the file
  49. * pointer has been moved to an invalid location. Since the two cannot
  50. * be differentiated, return EIO.
  51. *
  52. * Part zero is synthesized to contains `.' and `..'. Part one is the
  53. * first part of the part list.
  54. */
  55. static int do_readdir(struct orangefs_inode_s *oi,
  56. struct orangefs_dir *od, struct dentry *dentry,
  57. struct orangefs_kernel_op_s *op)
  58. {
  59. struct orangefs_readdir_response_s *resp;
  60. int bufi, r;
  61. /*
  62. * Despite the badly named field, readdir does not use shared
  63. * memory. However, there are a limited number of readdir
  64. * slots, which must be allocated here. This flag simply tells
  65. * the op scheduler to return the op here for retry.
  66. */
  67. op->uses_shared_memory = 1;
  68. op->upcall.req.readdir.refn = oi->refn;
  69. op->upcall.req.readdir.token = od->token;
  70. op->upcall.req.readdir.max_dirent_count =
  71. ORANGEFS_MAX_DIRENT_COUNT_READDIR;
  72. again:
  73. bufi = orangefs_readdir_index_get();
  74. if (bufi < 0) {
  75. od->error = bufi;
  76. return bufi;
  77. }
  78. op->upcall.req.readdir.buf_index = bufi;
  79. r = service_operation(op, "orangefs_readdir",
  80. get_interruptible_flag(dentry->d_inode));
  81. orangefs_readdir_index_put(bufi);
  82. if (op_state_purged(op)) {
  83. if (r == -EAGAIN) {
  84. vfree(op->downcall.trailer_buf);
  85. goto again;
  86. } else if (r == -EIO) {
  87. vfree(op->downcall.trailer_buf);
  88. od->error = r;
  89. return r;
  90. }
  91. }
  92. if (r < 0) {
  93. vfree(op->downcall.trailer_buf);
  94. od->error = r;
  95. return r;
  96. } else if (op->downcall.status) {
  97. vfree(op->downcall.trailer_buf);
  98. od->error = op->downcall.status;
  99. return op->downcall.status;
  100. }
  101. /*
  102. * The maximum size is size per entry times the 512 entries plus
  103. * the header. This is well under the limit.
  104. */
  105. if (op->downcall.trailer_size > PART_SIZE) {
  106. vfree(op->downcall.trailer_buf);
  107. od->error = -EIO;
  108. return -EIO;
  109. }
  110. resp = (struct orangefs_readdir_response_s *)
  111. op->downcall.trailer_buf;
  112. od->token = resp->token;
  113. return 0;
  114. }
  115. static int parse_readdir(struct orangefs_dir *od,
  116. struct orangefs_kernel_op_s *op)
  117. {
  118. struct orangefs_dir_part *part, *new;
  119. size_t count;
  120. count = 1;
  121. part = od->part;
  122. while (part) {
  123. count++;
  124. if (part->next)
  125. part = part->next;
  126. else
  127. break;
  128. }
  129. new = (void *)op->downcall.trailer_buf;
  130. new->next = NULL;
  131. new->len = op->downcall.trailer_size -
  132. sizeof(struct orangefs_readdir_response_s);
  133. if (!od->part)
  134. od->part = new;
  135. else
  136. part->next = new;
  137. count++;
  138. od->end = count << PART_SHIFT;
  139. return 0;
  140. }
  141. static int orangefs_dir_more(struct orangefs_inode_s *oi,
  142. struct orangefs_dir *od, struct dentry *dentry)
  143. {
  144. struct orangefs_kernel_op_s *op;
  145. int r;
  146. op = op_alloc(ORANGEFS_VFS_OP_READDIR);
  147. if (!op) {
  148. od->error = -ENOMEM;
  149. return -ENOMEM;
  150. }
  151. r = do_readdir(oi, od, dentry, op);
  152. if (r) {
  153. od->error = r;
  154. goto out;
  155. }
  156. r = parse_readdir(od, op);
  157. if (r) {
  158. od->error = r;
  159. goto out;
  160. }
  161. od->error = 0;
  162. out:
  163. op_release(op);
  164. return od->error;
  165. }
  166. static int fill_from_part(struct orangefs_dir_part *part,
  167. struct dir_context *ctx)
  168. {
  169. const int offset = sizeof(struct orangefs_readdir_response_s);
  170. struct orangefs_khandle *khandle;
  171. __u32 *len, padlen;
  172. loff_t i;
  173. char *s;
  174. i = ctx->pos & ~PART_MASK;
  175. /* The file offset from userspace is too large. */
  176. if (i > part->len)
  177. return 1;
  178. /*
  179. * If the seek pointer is positioned just before an entry it
  180. * should find the next entry.
  181. */
  182. if (i % 8)
  183. i = i + (8 - i%8)%8;
  184. while (i < part->len) {
  185. if (part->len < i + sizeof *len)
  186. break;
  187. len = (void *)part + offset + i;
  188. /*
  189. * len is the size of the string itself. padlen is the
  190. * total size of the encoded string.
  191. */
  192. padlen = (sizeof *len + *len + 1) +
  193. (8 - (sizeof *len + *len + 1)%8)%8;
  194. if (part->len < i + padlen + sizeof *khandle)
  195. goto next;
  196. s = (void *)part + offset + i + sizeof *len;
  197. if (s[*len] != 0)
  198. goto next;
  199. khandle = (void *)part + offset + i + padlen;
  200. if (!dir_emit(ctx, s, *len,
  201. orangefs_khandle_to_ino(khandle),
  202. DT_UNKNOWN))
  203. return 0;
  204. i += padlen + sizeof *khandle;
  205. i = i + (8 - i%8)%8;
  206. BUG_ON(i > part->len);
  207. ctx->pos = (ctx->pos & PART_MASK) | i;
  208. continue;
  209. next:
  210. i += 8;
  211. }
  212. return 1;
  213. }
  214. static int orangefs_dir_fill(struct orangefs_inode_s *oi,
  215. struct orangefs_dir *od, struct dentry *dentry,
  216. struct dir_context *ctx)
  217. {
  218. struct orangefs_dir_part *part;
  219. size_t count;
  220. count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
  221. part = od->part;
  222. while (part->next && count) {
  223. count--;
  224. part = part->next;
  225. }
  226. /* This means the userspace file offset is invalid. */
  227. if (count) {
  228. od->error = -EIO;
  229. return -EIO;
  230. }
  231. while (part && part->len) {
  232. int r;
  233. r = fill_from_part(part, ctx);
  234. if (r < 0) {
  235. od->error = r;
  236. return r;
  237. } else if (r == 0) {
  238. /* Userspace buffer is full. */
  239. break;
  240. } else {
  241. /*
  242. * The part ran out of data. Move to the next
  243. * part. */
  244. ctx->pos = (ctx->pos & PART_MASK) +
  245. (1 << PART_SHIFT);
  246. part = part->next;
  247. }
  248. }
  249. return 0;
  250. }
  251. static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
  252. int whence)
  253. {
  254. struct orangefs_dir *od = file->private_data;
  255. /*
  256. * Delete the stored data so userspace sees new directory
  257. * entries.
  258. */
  259. if (!whence && offset < od->end) {
  260. struct orangefs_dir_part *part = od->part;
  261. while (part) {
  262. struct orangefs_dir_part *next = part->next;
  263. vfree(part);
  264. part = next;
  265. }
  266. od->token = ORANGEFS_ITERATE_START;
  267. od->part = NULL;
  268. od->end = 1 << PART_SHIFT;
  269. }
  270. return default_llseek(file, offset, whence);
  271. }
  272. static int orangefs_dir_iterate(struct file *file,
  273. struct dir_context *ctx)
  274. {
  275. struct orangefs_inode_s *oi;
  276. struct orangefs_dir *od;
  277. struct dentry *dentry;
  278. int r;
  279. dentry = file->f_path.dentry;
  280. oi = ORANGEFS_I(dentry->d_inode);
  281. od = file->private_data;
  282. if (od->error)
  283. return od->error;
  284. if (ctx->pos == 0) {
  285. if (!dir_emit_dot(file, ctx))
  286. return 0;
  287. ctx->pos++;
  288. }
  289. if (ctx->pos == 1) {
  290. if (!dir_emit_dotdot(file, ctx))
  291. return 0;
  292. ctx->pos = 1 << PART_SHIFT;
  293. }
  294. /*
  295. * The seek position is in the first synthesized part but is not
  296. * valid.
  297. */
  298. if ((ctx->pos & PART_MASK) == 0)
  299. return -EIO;
  300. r = 0;
  301. /*
  302. * Must read more if the user has sought past what has been read
  303. * so far. Stop a user who has sought past the end.
  304. */
  305. while (od->token != ORANGEFS_ITERATE_END &&
  306. ctx->pos > od->end) {
  307. r = orangefs_dir_more(oi, od, dentry);
  308. if (r)
  309. return r;
  310. }
  311. if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
  312. return -EIO;
  313. /* Then try to fill if there's any left in the buffer. */
  314. if (ctx->pos < od->end) {
  315. r = orangefs_dir_fill(oi, od, dentry, ctx);
  316. if (r)
  317. return r;
  318. }
  319. /* Finally get some more and try to fill. */
  320. if (od->token != ORANGEFS_ITERATE_END) {
  321. r = orangefs_dir_more(oi, od, dentry);
  322. if (r)
  323. return r;
  324. r = orangefs_dir_fill(oi, od, dentry, ctx);
  325. }
  326. return r;
  327. }
  328. static int orangefs_dir_open(struct inode *inode, struct file *file)
  329. {
  330. struct orangefs_dir *od;
  331. file->private_data = kmalloc(sizeof(struct orangefs_dir),
  332. GFP_KERNEL);
  333. if (!file->private_data)
  334. return -ENOMEM;
  335. od = file->private_data;
  336. od->token = ORANGEFS_ITERATE_START;
  337. od->part = NULL;
  338. od->end = 1 << PART_SHIFT;
  339. od->error = 0;
  340. return 0;
  341. }
  342. static int orangefs_dir_release(struct inode *inode, struct file *file)
  343. {
  344. struct orangefs_dir *od = file->private_data;
  345. struct orangefs_dir_part *part = od->part;
  346. while (part) {
  347. struct orangefs_dir_part *next = part->next;
  348. vfree(part);
  349. part = next;
  350. }
  351. kfree(od);
  352. return 0;
  353. }
  354. const struct file_operations orangefs_dir_operations = {
  355. .llseek = orangefs_dir_llseek,
  356. .read = generic_read_dir,
  357. .iterate = orangefs_dir_iterate,
  358. .open = orangefs_dir_open,
  359. .release = orangefs_dir_release
  360. };