read.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. #include "fs.h"
  2. #include <stddef.h>
  3. #include <string.h>
  4. #include <stdlib.h>
  5. #include "buf.h"
  6. #include "inode.h"
  7. #include "super.h"
  8. #include <sys/param.h>
  9. #include <sys/dirent.h>
  10. #include <assert.h>
  11. static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t
  12. position, unsigned bytes_ahead);
  13. static int rw_chunk(struct inode *rip, u64_t position, unsigned off,
  14. size_t chunk, unsigned left, int call, struct fsdriver_data *data,
  15. unsigned buf_off, unsigned int block_size, int *completed);
  16. /*===========================================================================*
  17. * fs_readwrite *
  18. *===========================================================================*/
  19. ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes,
  20. off_t position, int call)
  21. {
  22. int r;
  23. int regular;
  24. off_t f_size, bytes_left;
  25. size_t off, cum_io, block_size, chunk;
  26. mode_t mode_word;
  27. int completed;
  28. struct inode *rip;
  29. r = OK;
  30. /* Find the inode referred */
  31. if ((rip = find_inode(fs_dev, ino_nr)) == NULL)
  32. return(EINVAL);
  33. mode_word = rip->i_mode & I_TYPE;
  34. regular = (mode_word == I_REGULAR);
  35. /* Determine blocksize */
  36. block_size = rip->i_sp->s_block_size;
  37. f_size = rip->i_size;
  38. /* If this is file i/o, check we can write */
  39. if (call == FSC_WRITE) {
  40. if(rip->i_sp->s_rd_only)
  41. return EROFS;
  42. /* Check in advance to see if file will grow too big. */
  43. if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
  44. return(EFBIG);
  45. /* Clear the zone containing present EOF if hole about
  46. * to be created. This is necessary because all unwritten
  47. * blocks prior to the EOF must read as zeros.
  48. */
  49. if(position > f_size) clear_zone(rip, f_size, 0);
  50. }
  51. cum_io = 0;
  52. /* Split the transfer into chunks that don't span two blocks. */
  53. while (nrbytes > 0) {
  54. off = ((unsigned int) position) % block_size; /* offset in blk*/
  55. chunk = block_size - off;
  56. if (chunk > nrbytes)
  57. chunk = nrbytes;
  58. if (call != FSC_WRITE) {
  59. bytes_left = f_size - position;
  60. if (position >= f_size) break; /* we are beyond EOF */
  61. if (chunk > (unsigned int) bytes_left) chunk = bytes_left;
  62. }
  63. /* Read or write 'chunk' bytes. */
  64. r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk,
  65. nrbytes, call, data, cum_io, block_size, &completed);
  66. if (r != OK) break;
  67. /* Update counters and pointers. */
  68. nrbytes -= chunk; /* bytes yet to be read */
  69. cum_io += chunk; /* bytes read so far */
  70. position += (off_t) chunk; /* position within the file */
  71. }
  72. /* On write, update file size and access time. */
  73. if (call == FSC_WRITE) {
  74. if (regular || mode_word == I_DIRECTORY) {
  75. if (position > f_size) rip->i_size = position;
  76. }
  77. }
  78. rip->i_seek = NO_SEEK;
  79. if (r != OK)
  80. return r;
  81. /* even on a ROFS, writing to a device node on it is fine,
  82. * just don't update the inode stats for it. And dito for reading.
  83. */
  84. if (!rip->i_sp->s_rd_only) {
  85. if (call == FSC_READ) rip->i_update |= ATIME;
  86. if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME;
  87. IN_MARKDIRTY(rip); /* inode is thus now dirty */
  88. }
  89. return cum_io;
  90. }
  91. /*===========================================================================*
  92. * rw_chunk *
  93. *===========================================================================*/
  94. static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off,
  95. block_size, completed)
  96. register struct inode *rip; /* pointer to inode for file to be rd/wr */
  97. u64_t position; /* position within file to read or write */
  98. unsigned off; /* off within the current block */
  99. size_t chunk; /* number of bytes to read or write */
  100. unsigned left; /* max number of bytes wanted after position */
  101. int call; /* FSC_READ, FSC_WRITE, or FSC_PEEK */
  102. struct fsdriver_data *data; /* structure for (remote) user buffer */
  103. unsigned buf_off; /* offset in user buffer */
  104. unsigned int block_size; /* block size of FS operating on */
  105. int *completed; /* number of bytes copied */
  106. {
  107. /* Read or write (part of) a block. */
  108. struct buf *bp = NULL;
  109. register int r = OK;
  110. int n;
  111. block_t b;
  112. dev_t dev;
  113. ino_t ino = VMC_NO_INODE;
  114. u64_t ino_off = rounddown(position, block_size);
  115. *completed = 0;
  116. if (ex64hi(position) != 0)
  117. panic("rw_chunk: position too high");
  118. b = read_map(rip, (off_t) ex64lo(position), 0);
  119. dev = rip->i_dev;
  120. ino = rip->i_num;
  121. assert(ino != VMC_NO_INODE);
  122. if (b == NO_BLOCK) {
  123. if (call == FSC_READ) {
  124. /* Reading from a nonexistent block. Must read as all zeros.*/
  125. r = fsdriver_zero(data, buf_off, chunk);
  126. if(r != OK) {
  127. printf("MFS: fsdriver_zero failed\n");
  128. }
  129. return r;
  130. } else if (call == FSC_PEEK) {
  131. /* Peeking a nonexistent block. Report to VM. */
  132. lmfs_zero_block_ino(dev, ino, ino_off);
  133. return OK;
  134. } else {
  135. /* Writing to a nonexistent block.
  136. * Create and enter in inode.
  137. */
  138. if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
  139. return(err_code);
  140. }
  141. } else if (call != FSC_WRITE) {
  142. /* Read and read ahead if convenient. */
  143. bp = rahead(rip, b, position, left);
  144. } else {
  145. /* Normally an existing block to be partially overwritten is first read
  146. * in. However, a full block need not be read in. If it is already in
  147. * the cache, acquire it, otherwise just acquire a free buffer.
  148. */
  149. n = (chunk == block_size ? NO_READ : NORMAL);
  150. if (off == 0 && (off_t) ex64lo(position) >= rip->i_size)
  151. n = NO_READ;
  152. assert(ino != VMC_NO_INODE);
  153. assert(!(ino_off % block_size));
  154. if ((r = lmfs_get_block_ino(&bp, dev, b, n, ino, ino_off)) != OK)
  155. panic("MFS: error getting block (%llu,%u): %d", dev, b, r);
  156. }
  157. /* In all cases, bp now points to a valid buffer. */
  158. assert(bp != NULL);
  159. if (call == FSC_WRITE && chunk != block_size &&
  160. (off_t) ex64lo(position) >= rip->i_size && off == 0) {
  161. zero_block(bp);
  162. }
  163. if (call == FSC_READ) {
  164. /* Copy a chunk from the block buffer to user space. */
  165. r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk);
  166. } else if (call == FSC_WRITE) {
  167. /* Copy a chunk from user space to the block buffer. */
  168. r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk);
  169. MARKDIRTY(bp);
  170. }
  171. put_block(bp);
  172. return(r);
  173. }
  174. /*===========================================================================*
  175. * read_map *
  176. *===========================================================================*/
  177. block_t read_map(rip, position, opportunistic)
  178. register struct inode *rip; /* ptr to inode to map from */
  179. off_t position; /* position in file whose blk wanted */
  180. int opportunistic; /* if nonzero, only use cache for metadata */
  181. {
  182. /* Given an inode and a position within the corresponding file, locate the
  183. * block (not zone) number in which that position is to be found and return it.
  184. */
  185. struct buf *bp;
  186. zone_t z;
  187. int scale, boff, index, zind;
  188. unsigned int dzones, nr_indirects;
  189. block_t b;
  190. unsigned long excess, zone, block_pos;
  191. int iomode;
  192. iomode = opportunistic ? PEEK : NORMAL;
  193. scale = rip->i_sp->s_log_zone_size; /* for block-zone conversion */
  194. block_pos = position/rip->i_sp->s_block_size; /* relative blk # in file */
  195. zone = block_pos >> scale; /* position's zone */
  196. boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
  197. dzones = rip->i_ndzones;
  198. nr_indirects = rip->i_nindirs;
  199. /* Is 'position' to be found in the inode itself? */
  200. if (zone < dzones) {
  201. zind = (int) zone; /* index should be an int */
  202. z = rip->i_zone[zind];
  203. if (z == NO_ZONE) return(NO_BLOCK);
  204. b = (block_t) ((z << scale) + boff);
  205. return(b);
  206. }
  207. /* It is not in the inode, so it must be single or double indirect. */
  208. excess = zone - dzones; /* first Vx_NR_DZONES don't count */
  209. if (excess < nr_indirects) {
  210. /* 'position' can be located via the single indirect block. */
  211. z = rip->i_zone[dzones];
  212. } else {
  213. /* 'position' can be located via the double indirect block. */
  214. if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
  215. excess -= nr_indirects; /* single indir doesn't count*/
  216. b = (block_t) z << scale;
  217. ASSERT(rip->i_dev != NO_DEV);
  218. index = (int) (excess/nr_indirects);
  219. if ((unsigned int) index > rip->i_nindirs)
  220. return(NO_BLOCK); /* Can't go beyond double indirects */
  221. bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */
  222. if (bp == NULL)
  223. return NO_BLOCK; /* peeking failed */
  224. z = rd_indir(bp, index); /* z= zone for single*/
  225. put_block(bp); /* release double ind block */
  226. excess = excess % nr_indirects; /* index into single ind blk */
  227. }
  228. /* 'z' is zone num for single indirect block; 'excess' is index into it. */
  229. if (z == NO_ZONE) return(NO_BLOCK);
  230. b = (block_t) z << scale; /* b is blk # for single ind */
  231. bp = get_block(rip->i_dev, b, iomode); /* get single indirect block */
  232. if (bp == NULL)
  233. return NO_BLOCK; /* peeking failed */
  234. z = rd_indir(bp, (int) excess); /* get block pointed to */
  235. put_block(bp); /* release single indir blk */
  236. if (z == NO_ZONE) return(NO_BLOCK);
  237. b = (block_t) ((z << scale) + boff);
  238. return(b);
  239. }
  240. struct buf *get_block_map(register struct inode *rip, u64_t position)
  241. {
  242. struct buf *bp;
  243. int r, block_size;
  244. block_t b = read_map(rip, position, 0); /* get block number */
  245. if(b == NO_BLOCK)
  246. return NULL;
  247. block_size = get_block_size(rip->i_dev);
  248. position = rounddown(position, block_size);
  249. assert(rip->i_num != VMC_NO_INODE);
  250. if ((r = lmfs_get_block_ino(&bp, rip->i_dev, b, NORMAL, rip->i_num,
  251. position)) != OK)
  252. panic("MFS: error getting block (%llu,%u): %d",
  253. rip->i_dev, b, r);
  254. return bp;
  255. }
  256. /*===========================================================================*
  257. * rd_indir *
  258. *===========================================================================*/
  259. zone_t rd_indir(bp, index)
  260. struct buf *bp; /* pointer to indirect block */
  261. int index; /* index into *bp */
  262. {
  263. struct super_block *sp;
  264. zone_t zone;
  265. if(bp == NULL)
  266. panic("rd_indir() on NULL");
  267. sp = &superblock;
  268. /* read a zone from an indirect block */
  269. assert(sp->s_version == V3);
  270. zone = (zone_t) conv4(sp->s_native, (long) b_v2_ind(bp)[index]);
  271. if (zone != NO_ZONE &&
  272. (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
  273. printf("Illegal zone number %ld in indirect block, index %d\n",
  274. (long) zone, index);
  275. panic("check file system");
  276. }
  277. return(zone);
  278. }
  279. /*===========================================================================*
  280. * rahead *
  281. *===========================================================================*/
  282. static struct buf *rahead(rip, baseblock, position, bytes_ahead)
  283. register struct inode *rip; /* pointer to inode for file to be read */
  284. block_t baseblock; /* block at current position */
  285. u64_t position; /* position within file */
  286. unsigned bytes_ahead; /* bytes beyond position for immediate use */
  287. {
  288. /* Fetch a block from the cache or the device. If a physical read is
  289. * required, prefetch as many more blocks as convenient into the cache.
  290. * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
  291. * The device driver may decide it knows better and stop reading at a
  292. * cylinder boundary (or after an error). Rw_scattered() puts an optional
  293. * flag on all reads to allow this.
  294. */
  295. /* Minimum number of blocks to prefetch. */
  296. # define BLOCKS_MINIMUM 32
  297. int r, scale, read_q_size;
  298. unsigned int blocks_ahead, fragment, block_size;
  299. block_t block, blocks_left;
  300. off_t ind1_pos;
  301. dev_t dev;
  302. struct buf *bp;
  303. static block64_t read_q[LMFS_MAX_PREFETCH];
  304. u64_t position_running;
  305. dev = rip->i_dev;
  306. assert(dev != NO_DEV);
  307. block_size = get_block_size(dev);
  308. block = baseblock;
  309. fragment = position % block_size;
  310. position -= fragment;
  311. position_running = position;
  312. bytes_ahead += fragment;
  313. blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
  314. r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position);
  315. if (r == OK)
  316. return(bp);
  317. if (r != ENOENT)
  318. panic("MFS: error getting block (%llu,%u): %d", dev, block, r);
  319. /* The best guess for the number of blocks to prefetch: A lot.
  320. * It is impossible to tell what the device looks like, so we don't even
  321. * try to guess the geometry, but leave it to the driver.
  322. *
  323. * The floppy driver can read a full track with no rotational delay, and it
  324. * avoids reading partial tracks if it can, so handing it enough buffers to
  325. * read two tracks is perfect. (Two, because some diskette types have
  326. * an odd number of sectors per track, so a block may span tracks.)
  327. *
  328. * The disk drivers don't try to be smart. With todays disks it is
  329. * impossible to tell what the real geometry looks like, so it is best to
  330. * read as much as you can. With luck the caching on the drive allows
  331. * for a little time to start the next read.
  332. *
  333. * The current solution below is a bit of a hack, it just reads blocks from
  334. * the current file position hoping that more of the file can be found. A
  335. * better solution must look at the already available zone pointers and
  336. * indirect blocks (but don't call read_map!).
  337. */
  338. blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
  339. block_size;
  340. /* Go for the first indirect block if we are in its neighborhood. */
  341. scale = rip->i_sp->s_log_zone_size;
  342. ind1_pos = (off_t) rip->i_ndzones * (block_size << scale);
  343. if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
  344. blocks_ahead++;
  345. blocks_left++;
  346. }
  347. /* Read at least the minimum number of blocks, but not after a seek. */
  348. if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
  349. blocks_ahead = BLOCKS_MINIMUM;
  350. /* Can't go past end of file. */
  351. if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
  352. /* No more than the maximum request. */
  353. if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH;
  354. read_q_size = 0;
  355. /* Acquire block buffers. */
  356. for (;;) {
  357. block_t thisblock;
  358. read_q[read_q_size++] = block;
  359. if (--blocks_ahead == 0) break;
  360. block++;
  361. position_running += block_size;
  362. thisblock = read_map(rip, (off_t) ex64lo(position_running), 1);
  363. if (thisblock != NO_BLOCK) {
  364. r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num,
  365. position_running);
  366. block = thisblock;
  367. } else
  368. r = lmfs_get_block(&bp, dev, block, PEEK);
  369. if (r == OK) {
  370. /* Oops, block already in the cache, get out. */
  371. put_block(bp);
  372. break;
  373. }
  374. if (r != ENOENT)
  375. panic("MFS: error getting block (%llu,%u): %d", dev, block, r);
  376. }
  377. lmfs_prefetch(dev, read_q, read_q_size);
  378. r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position);
  379. if (r != OK)
  380. panic("MFS: error getting block (%llu,%u): %d", dev, baseblock, r);
  381. return bp;
  382. }
  383. /*===========================================================================*
  384. * fs_getdents *
  385. *===========================================================================*/
  386. ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes,
  387. off_t *posp)
  388. {
  389. #define GETDENTS_BUFSIZE (sizeof(struct dirent) + MFS_NAME_MAX + 1)
  390. #define GETDENTS_ENTRIES 8
  391. static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES];
  392. struct fsdriver_dentry fsdentry;
  393. struct inode *rip, *entrip;
  394. int r, done;
  395. unsigned int block_size, len, type;
  396. off_t pos, off, block_pos, new_pos, ent_pos;
  397. struct buf *bp;
  398. struct direct *dp;
  399. char *cp;
  400. /* Check whether the position is properly aligned */
  401. pos = *posp;
  402. if( (unsigned int) pos % DIR_ENTRY_SIZE)
  403. return(ENOENT);
  404. if( (rip = get_inode(fs_dev, ino_nr)) == NULL)
  405. return(EINVAL);
  406. block_size = rip->i_sp->s_block_size;
  407. off = (pos % block_size); /* Offset in block */
  408. block_pos = pos - off;
  409. done = FALSE; /* Stop processing directory blocks when done is set */
  410. fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf,
  411. sizeof(getdents_buf));
  412. /* The default position for the next request is EOF. If the user's buffer
  413. * fills up before EOF, new_pos will be modified. */
  414. new_pos = rip->i_size;
  415. r = 0;
  416. for(; block_pos < rip->i_size; block_pos += block_size) {
  417. /* Since directories don't have holes, 'bp' cannot be NULL. */
  418. bp = get_block_map(rip, block_pos); /* get a dir block */
  419. assert(bp != NULL);
  420. /* Search a directory block. */
  421. if (block_pos < pos)
  422. dp = &b_dir(bp)[off / DIR_ENTRY_SIZE];
  423. else
  424. dp = &b_dir(bp)[0];
  425. for (; dp < &b_dir(bp)[NR_DIR_ENTRIES(block_size)]; dp++) {
  426. if (dp->mfs_d_ino == 0)
  427. continue; /* Entry is not in use */
  428. /* Compute the length of the name */
  429. cp = memchr(dp->mfs_d_name, '\0', sizeof(dp->mfs_d_name));
  430. if (cp == NULL)
  431. len = sizeof(dp->mfs_d_name);
  432. else
  433. len = cp - (dp->mfs_d_name);
  434. /* Need the position of this entry in the directory */
  435. ent_pos = block_pos + ((char *) dp - (char *) bp->data);
  436. /* We also need(?) the file type of the target inode. */
  437. if (!(entrip = get_inode(fs_dev, (ino_t) dp->mfs_d_ino)))
  438. panic("unexpected get_inode failure");
  439. type = IFTODT(entrip->i_mode);
  440. put_inode(entrip);
  441. /* MFS does not store file types in its directory entries, and
  442. * fetching the mode from the inode is seriously expensive.
  443. * Userland should always be prepared to receive DT_UNKNOWN.
  444. */
  445. r = fsdriver_dentry_add(&fsdentry, (ino_t) dp->mfs_d_ino,
  446. dp->mfs_d_name, len, type);
  447. /* If the user buffer is full, or an error occurred, stop. */
  448. if (r <= 0) {
  449. done = TRUE;
  450. /* Record the position of this entry, it is the
  451. * starting point of the next request (unless the
  452. * postion is modified with lseek).
  453. */
  454. new_pos = ent_pos;
  455. break;
  456. }
  457. }
  458. put_block(bp);
  459. if (done)
  460. break;
  461. }
  462. if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) {
  463. *posp = new_pos;
  464. if(!rip->i_sp->s_rd_only) {
  465. rip->i_update |= ATIME;
  466. IN_MARKDIRTY(rip);
  467. }
  468. }
  469. put_inode(rip); /* release the inode */
  470. return(r);
  471. }