memfiles.nim 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Nim Contributors
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## :Authors: Zahary Karadjov, Andreas Rumpf
  10. ##
  11. ## This module provides support for `memory mapped files`:idx:
  12. ## (Posix's `mmap`:idx:) on the different operating systems.
  13. ##
  14. ## It also provides some fast iterators over lines in text files (or
  15. ## other "line-like", variable length, delimited records).
  16. when defined(windows):
  17. import winlean
  18. when useWinUnicode and defined(nimPreviewSlimSystem):
  19. import std/widestrs
  20. elif defined(posix):
  21. import posix
  22. else:
  23. {.error: "the memfiles module is not supported on your operating system!".}
  24. import streams
  25. import std/oserrors
  26. when defined(nimPreviewSlimSystem):
  27. import std/[syncio, assertions]
  28. proc newEIO(msg: string): ref IOError =
  29. new(result)
  30. result.msg = msg
  31. type
  32. MemFile* = object ## represents a memory mapped file
  33. mem*: pointer ## a pointer to the memory mapped file. The pointer
  34. ## can be used directly to change the contents of the
  35. ## file, if it was opened with write access.
  36. size*: int ## size of the memory mapped file
  37. when defined(windows):
  38. fHandle*: Handle ## **Caution**: Windows specific public field to allow
  39. ## even more low level trickery.
  40. mapHandle*: Handle ## **Caution**: Windows specific public field.
  41. wasOpened*: bool ## **Caution**: Windows specific public field.
  42. else:
  43. handle*: cint ## **Caution**: Posix specific public field.
  44. flags: cint ## **Caution**: Platform specific private field.
  45. proc mapMem*(m: var MemFile, mode: FileMode = fmRead,
  46. mappedSize = -1, offset = 0, mapFlags = cint(-1)): pointer =
  47. ## returns a pointer to a mapped portion of MemFile `m`
  48. ##
  49. ## `mappedSize` of `-1` maps to the whole file, and
  50. ## `offset` must be multiples of the PAGE SIZE of your OS
  51. if mode == fmAppend:
  52. raise newEIO("The append mode is not supported.")
  53. var readonly = mode == fmRead
  54. when defined(windows):
  55. result = mapViewOfFileEx(
  56. m.mapHandle,
  57. if readonly: FILE_MAP_READ else: FILE_MAP_READ or FILE_MAP_WRITE,
  58. int32(offset shr 32),
  59. int32(offset and 0xffffffff),
  60. WinSizeT(if mappedSize == -1: 0 else: mappedSize),
  61. nil)
  62. if result == nil:
  63. raiseOSError(osLastError())
  64. else:
  65. assert mappedSize > 0
  66. m.flags = if mapFlags == cint(-1): MAP_SHARED else: mapFlags
  67. #Ensure exactly one of MAP_PRIVATE cr MAP_SHARED is set
  68. if int(m.flags and MAP_PRIVATE) == 0:
  69. m.flags = m.flags or MAP_SHARED
  70. result = mmap(
  71. nil,
  72. mappedSize,
  73. if readonly: PROT_READ else: PROT_READ or PROT_WRITE,
  74. m.flags,
  75. m.handle, offset)
  76. if result == cast[pointer](MAP_FAILED):
  77. raiseOSError(osLastError())
  78. proc unmapMem*(f: var MemFile, p: pointer, size: int) =
  79. ## unmaps the memory region `(p, <p+size)` of the mapped file `f`.
  80. ## All changes are written back to the file system, if `f` was opened
  81. ## with write access.
  82. ##
  83. ## `size` must be of exactly the size that was requested
  84. ## via `mapMem`.
  85. when defined(windows):
  86. if unmapViewOfFile(p) == 0: raiseOSError(osLastError())
  87. else:
  88. if munmap(p, size) != 0: raiseOSError(osLastError())
  89. proc open*(filename: string, mode: FileMode = fmRead,
  90. mappedSize = -1, offset = 0, newFileSize = -1,
  91. allowRemap = false, mapFlags = cint(-1)): MemFile =
  92. ## opens a memory mapped file. If this fails, `OSError` is raised.
  93. ##
  94. ## `newFileSize` can only be set if the file does not exist and is opened
  95. ## with write access (e.g., with fmReadWrite).
  96. ##
  97. ##`mappedSize` and `offset`
  98. ## can be used to map only a slice of the file.
  99. ##
  100. ## `offset` must be multiples of the PAGE SIZE of your OS
  101. ## (usually 4K or 8K but is unique to your OS)
  102. ##
  103. ## `allowRemap` only needs to be true if you want to call `mapMem` on
  104. ## the resulting MemFile; else file handles are not kept open.
  105. ##
  106. ## `mapFlags` allows callers to override default choices for memory mapping
  107. ## flags with a bitwise mask of a variety of likely platform-specific flags
  108. ## which may be ignored or even cause `open` to fail if misspecified.
  109. ##
  110. ## Example:
  111. ##
  112. ## .. code-block:: nim
  113. ## var
  114. ## mm, mm_full, mm_half: MemFile
  115. ##
  116. ## mm = memfiles.open("/tmp/test.mmap", mode = fmWrite, newFileSize = 1024) # Create a new file
  117. ## mm.close()
  118. ##
  119. ## # Read the whole file, would fail if newFileSize was set
  120. ## mm_full = memfiles.open("/tmp/test.mmap", mode = fmReadWrite, mappedSize = -1)
  121. ##
  122. ## # Read the first 512 bytes
  123. ## mm_half = memfiles.open("/tmp/test.mmap", mode = fmReadWrite, mappedSize = 512)
  124. # The file can be resized only when write mode is used:
  125. if mode == fmAppend:
  126. raise newEIO("The append mode is not supported.")
  127. assert newFileSize == -1 or mode != fmRead
  128. var readonly = mode == fmRead
  129. template rollback =
  130. result.mem = nil
  131. result.size = 0
  132. when defined(windows):
  133. let desiredAccess = GENERIC_READ
  134. let shareMode = FILE_SHARE_READ
  135. let flags = FILE_FLAG_RANDOM_ACCESS
  136. template fail(errCode: OSErrorCode, msg: untyped) =
  137. rollback()
  138. if result.fHandle != 0: discard closeHandle(result.fHandle)
  139. if result.mapHandle != 0: discard closeHandle(result.mapHandle)
  140. raiseOSError(errCode)
  141. # return false
  142. #raise newException(IOError, msg)
  143. template callCreateFile(winApiProc, filename): untyped =
  144. winApiProc(
  145. filename,
  146. # GENERIC_ALL != (GENERIC_READ or GENERIC_WRITE)
  147. if readonly: desiredAccess else: desiredAccess or GENERIC_WRITE,
  148. if readonly: shareMode else: shareMode or FILE_SHARE_WRITE,
  149. nil,
  150. if newFileSize != -1: CREATE_ALWAYS else: OPEN_EXISTING,
  151. if readonly: FILE_ATTRIBUTE_READONLY or flags
  152. else: FILE_ATTRIBUTE_NORMAL or flags,
  153. 0)
  154. when useWinUnicode:
  155. result.fHandle = callCreateFile(createFileW, newWideCString(filename))
  156. else:
  157. result.fHandle = callCreateFile(createFileA, filename)
  158. if result.fHandle == INVALID_HANDLE_VALUE:
  159. fail(osLastError(), "error opening file")
  160. if newFileSize != -1:
  161. var
  162. sizeHigh = int32(newFileSize shr 32)
  163. sizeLow = int32(newFileSize and 0xffffffff)
  164. var status = setFilePointer(result.fHandle, sizeLow, addr(sizeHigh),
  165. FILE_BEGIN)
  166. let lastErr = osLastError()
  167. if (status == INVALID_SET_FILE_POINTER and lastErr.int32 != NO_ERROR) or
  168. (setEndOfFile(result.fHandle) == 0):
  169. fail(lastErr, "error setting file size")
  170. # since the strings are always 'nil', we simply always call
  171. # CreateFileMappingW which should be slightly faster anyway:
  172. result.mapHandle = createFileMappingW(
  173. result.fHandle, nil,
  174. if readonly: PAGE_READONLY else: PAGE_READWRITE,
  175. 0, 0, nil)
  176. if result.mapHandle == 0:
  177. fail(osLastError(), "error creating mapping")
  178. result.mem = mapViewOfFileEx(
  179. result.mapHandle,
  180. if readonly: FILE_MAP_READ else: FILE_MAP_READ or FILE_MAP_WRITE,
  181. int32(offset shr 32),
  182. int32(offset and 0xffffffff),
  183. if mappedSize == -1: 0 else: mappedSize,
  184. nil)
  185. if result.mem == nil:
  186. fail(osLastError(), "error mapping view")
  187. var hi, low: int32
  188. low = getFileSize(result.fHandle, addr(hi))
  189. if low == INVALID_FILE_SIZE:
  190. fail(osLastError(), "error getting file size")
  191. else:
  192. var fileSize = (int64(hi) shl 32) or int64(uint32(low))
  193. if mappedSize != -1: result.size = min(fileSize, mappedSize).int
  194. else: result.size = fileSize.int
  195. result.wasOpened = true
  196. if not allowRemap and result.fHandle != INVALID_HANDLE_VALUE:
  197. if closeHandle(result.fHandle) == 0:
  198. result.fHandle = INVALID_HANDLE_VALUE
  199. else:
  200. template fail(errCode: OSErrorCode, msg: string) =
  201. rollback()
  202. if result.handle != -1: discard close(result.handle)
  203. raiseOSError(errCode)
  204. var flags = (if readonly: O_RDONLY else: O_RDWR) or O_CLOEXEC
  205. if newFileSize != -1:
  206. flags = flags or O_CREAT or O_TRUNC
  207. var permissionsMode = S_IRUSR or S_IWUSR
  208. result.handle = open(filename, flags, permissionsMode)
  209. else:
  210. result.handle = open(filename, flags)
  211. if result.handle == -1:
  212. # XXX: errno is supposed to be set here
  213. # Is there an exception that wraps it?
  214. fail(osLastError(), "error opening file")
  215. if newFileSize != -1:
  216. if ftruncate(result.handle, newFileSize) == -1:
  217. fail(osLastError(), "error setting file size")
  218. if mappedSize != -1:
  219. result.size = mappedSize
  220. else:
  221. var stat: Stat
  222. if fstat(result.handle, stat) != -1:
  223. # XXX: Hmm, this could be unsafe
  224. # Why is mmap taking int anyway?
  225. result.size = int(stat.st_size)
  226. else:
  227. fail(osLastError(), "error getting file size")
  228. result.flags = if mapFlags == cint(-1): MAP_SHARED else: mapFlags
  229. #Ensure exactly one of MAP_PRIVATE cr MAP_SHARED is set
  230. if int(result.flags and MAP_PRIVATE) == 0:
  231. result.flags = result.flags or MAP_SHARED
  232. result.mem = mmap(
  233. nil,
  234. result.size,
  235. if readonly: PROT_READ else: PROT_READ or PROT_WRITE,
  236. result.flags,
  237. result.handle,
  238. offset)
  239. if result.mem == cast[pointer](MAP_FAILED):
  240. fail(osLastError(), "file mapping failed")
  241. if not allowRemap and result.handle != -1:
  242. if close(result.handle) == 0:
  243. result.handle = -1
  244. proc flush*(f: var MemFile; attempts: Natural = 3) =
  245. ## Flushes `f`'s buffer for the number of attempts equal to `attempts`.
  246. ## If were errors an exception `OSError` will be raised.
  247. var res = false
  248. var lastErr: OSErrorCode
  249. when defined(windows):
  250. for i in 1..attempts:
  251. res = flushViewOfFile(f.mem, 0) != 0
  252. if res:
  253. break
  254. lastErr = osLastError()
  255. if lastErr != ERROR_LOCK_VIOLATION.OSErrorCode:
  256. raiseOSError(lastErr)
  257. else:
  258. for i in 1..attempts:
  259. res = msync(f.mem, f.size, MS_SYNC or MS_INVALIDATE) == 0
  260. if res:
  261. break
  262. lastErr = osLastError()
  263. if lastErr != EBUSY.OSErrorCode:
  264. raiseOSError(lastErr, "error flushing mapping")
  265. when defined(posix) or defined(nimdoc):
  266. proc resize*(f: var MemFile, newFileSize: int) {.raises: [IOError, OSError].} =
  267. ## resize and re-map the file underlying an `allowRemap MemFile`.
  268. ## **Note**: this assumes the entire file is mapped read-write at offset zero.
  269. ## Also, the value of `.mem` will probably change.
  270. ## **Note**: This is not (yet) available on Windows.
  271. when defined(posix):
  272. if f.handle == -1:
  273. raise newException(IOError,
  274. "Cannot resize MemFile opened with allowRemap=false")
  275. if ftruncate(f.handle, newFileSize) == -1:
  276. raiseOSError(osLastError())
  277. when defined(linux): #Maybe NetBSD, too?
  278. #On Linux this can be over 100 times faster than a munmap,mmap cycle.
  279. proc mremap(old: pointer; oldSize, newSize: csize_t; flags: cint):
  280. pointer {.importc: "mremap", header: "<sys/mman.h>".}
  281. let newAddr = mremap(f.mem, csize_t(f.size), csize_t(newFileSize), cint(1))
  282. if newAddr == cast[pointer](MAP_FAILED):
  283. raiseOSError(osLastError())
  284. else:
  285. if munmap(f.mem, f.size) != 0:
  286. raiseOSError(osLastError())
  287. let newAddr = mmap(nil, newFileSize, PROT_READ or PROT_WRITE,
  288. f.flags, f.handle, 0)
  289. if newAddr == cast[pointer](MAP_FAILED):
  290. raiseOSError(osLastError())
  291. f.mem = newAddr
  292. f.size = newFileSize
  293. proc close*(f: var MemFile) =
  294. ## closes the memory mapped file `f`. All changes are written back to the
  295. ## file system, if `f` was opened with write access.
  296. var error = false
  297. var lastErr: OSErrorCode
  298. when defined(windows):
  299. if f.wasOpened:
  300. error = unmapViewOfFile(f.mem) == 0
  301. if not error:
  302. error = closeHandle(f.mapHandle) == 0
  303. if not error and f.fHandle != INVALID_HANDLE_VALUE:
  304. discard closeHandle(f.fHandle)
  305. f.fHandle = INVALID_HANDLE_VALUE
  306. if error:
  307. lastErr = osLastError()
  308. else:
  309. error = munmap(f.mem, f.size) != 0
  310. lastErr = osLastError()
  311. if f.handle != -1:
  312. error = (close(f.handle) != 0) or error
  313. f.size = 0
  314. f.mem = nil
  315. when defined(windows):
  316. f.fHandle = 0
  317. f.mapHandle = 0
  318. f.wasOpened = false
  319. else:
  320. f.handle = -1
  321. if error: raiseOSError(lastErr)
  322. type MemSlice* = object ## represent slice of a MemFile for iteration over delimited lines/records
  323. data*: pointer
  324. size*: int
  325. proc `==`*(x, y: MemSlice): bool =
  326. ## Compare a pair of MemSlice for strict equality.
  327. result = (x.size == y.size and equalMem(x.data, y.data, x.size))
  328. proc `$`*(ms: MemSlice): string {.inline.} =
  329. ## Return a Nim string built from a MemSlice.
  330. result.setLen(ms.size)
  331. copyMem(addr(result[0]), ms.data, ms.size)
  332. iterator memSlices*(mfile: MemFile, delim = '\l', eat = '\r'): MemSlice {.inline.} =
  333. ## Iterates over \[optional `eat`] `delim`-delimited slices in MemFile `mfile`.
  334. ##
  335. ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l)
  336. ## style on on a line-by-line basis. I.e., not every line needs the same ending.
  337. ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines
  338. ## are not supported as a third option for each line. Such archaic MacOS9
  339. ## files can be handled by passing delim='\\r', eat='\\0', though.
  340. ##
  341. ## Delimiters are not part of the returned slice. A final, unterminated line
  342. ## or record is returned just like any other.
  343. ##
  344. ## Non-default delimiters can be passed to allow iteration over other sorts
  345. ## of "line-like" variable length records. Pass eat='\\0' to be strictly
  346. ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not
  347. ## supported.)
  348. ##
  349. ## This zero copy, memchr-limited interface is probably the fastest way to
  350. ## iterate over line-like records in a file. However, returned (data,size)
  351. ## objects are not Nim strings, bounds checked Nim arrays, or even terminated
  352. ## C strings. So, care is required to access the data (e.g., think C mem*
  353. ## functions, not str* functions).
  354. ##
  355. ## Example:
  356. ##
  357. ## .. code-block:: nim
  358. ## var count = 0
  359. ## for slice in memSlices(memfiles.open("foo")):
  360. ## if slice.size > 0 and cast[cstring](slice.data)[0] != '#':
  361. ## inc(count)
  362. ## echo count
  363. proc c_memchr(cstr: pointer, c: char, n: csize_t): pointer {.
  364. importc: "memchr", header: "<string.h>".}
  365. proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q)
  366. var ms: MemSlice
  367. var ending: pointer
  368. ms.data = mfile.mem
  369. var remaining = mfile.size
  370. while remaining > 0:
  371. ending = c_memchr(ms.data, delim, csize_t(remaining))
  372. if ending == nil: # unterminated final slice
  373. ms.size = remaining # Weird case..check eat?
  374. yield ms
  375. break
  376. ms.size = ending -! ms.data # delim is NOT included
  377. if eat != '\0' and ms.size > 0 and cast[cstring](ms.data)[ms.size - 1] == eat:
  378. dec(ms.size) # trim pre-delim char
  379. yield ms
  380. ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim
  381. remaining = mfile.size - (ms.data -! mfile.mem)
  382. iterator lines*(mfile: MemFile, buf: var string, delim = '\l',
  383. eat = '\r'): string {.inline.} =
  384. ## Replace contents of passed buffer with each new line, like
  385. ## `readLine(File) <syncio.html#readLine,File,string>`_.
  386. ## `delim`, `eat`, and delimiting logic is exactly as for `memSlices
  387. ## <#memSlices.i,MemFile,char,char>`_, but Nim strings are returned.
  388. ##
  389. ## Example:
  390. ##
  391. ## .. code-block:: nim
  392. ## var buffer: string = ""
  393. ## for line in lines(memfiles.open("foo"), buffer):
  394. ## echo line
  395. for ms in memSlices(mfile, delim, eat):
  396. setLen(buf, ms.size)
  397. if ms.size > 0:
  398. copyMem(addr buf[0], ms.data, ms.size)
  399. yield buf
  400. iterator lines*(mfile: MemFile, delim = '\l', eat = '\r'): string {.inline.} =
  401. ## Return each line in a file as a Nim string, like
  402. ## `lines(File) <syncio.html#lines.i,File>`_.
  403. ## `delim`, `eat`, and delimiting logic is exactly as for `memSlices
  404. ## <#memSlices.i,MemFile,char,char>`_, but Nim strings are returned.
  405. ##
  406. ## Example:
  407. ##
  408. ## .. code-block:: nim
  409. ## for line in lines(memfiles.open("foo")):
  410. ## echo line
  411. var buf = newStringOfCap(80)
  412. for line in lines(mfile, buf, delim, eat):
  413. yield buf
  414. type
  415. MemMapFileStream* = ref MemMapFileStreamObj ## a stream that encapsulates a `MemFile`
  416. MemMapFileStreamObj* = object of Stream
  417. mf: MemFile
  418. mode: FileMode
  419. pos: ByteAddress
  420. proc mmsClose(s: Stream) =
  421. MemMapFileStream(s).pos = -1
  422. close(MemMapFileStream(s).mf)
  423. proc mmsFlush(s: Stream) = flush(MemMapFileStream(s).mf)
  424. proc mmsAtEnd(s: Stream): bool = (MemMapFileStream(s).pos >= MemMapFileStream(s).mf.size) or
  425. (MemMapFileStream(s).pos < 0)
  426. proc mmsSetPosition(s: Stream, pos: int) =
  427. if pos > MemMapFileStream(s).mf.size or pos < 0:
  428. raise newEIO("cannot set pos in stream")
  429. MemMapFileStream(s).pos = pos
  430. proc mmsGetPosition(s: Stream): int = MemMapFileStream(s).pos
  431. proc mmsPeekData(s: Stream, buffer: pointer, bufLen: int): int =
  432. let startAddress = cast[ByteAddress](MemMapFileStream(s).mf.mem)
  433. let p = cast[ByteAddress](MemMapFileStream(s).pos)
  434. let l = min(bufLen, MemMapFileStream(s).mf.size - p)
  435. moveMem(buffer, cast[pointer](startAddress + p), l)
  436. result = l
  437. proc mmsReadData(s: Stream, buffer: pointer, bufLen: int): int =
  438. result = mmsPeekData(s, buffer, bufLen)
  439. inc(MemMapFileStream(s).pos, result)
  440. proc mmsWriteData(s: Stream, buffer: pointer, bufLen: int) =
  441. if MemMapFileStream(s).mode == fmRead:
  442. raise newEIO("cannot write to read-only stream")
  443. let size = MemMapFileStream(s).mf.size
  444. if MemMapFileStream(s).pos + bufLen > size:
  445. raise newEIO("cannot write to stream")
  446. let p = cast[ByteAddress](MemMapFileStream(s).mf.mem) +
  447. cast[ByteAddress](MemMapFileStream(s).pos)
  448. moveMem(cast[pointer](p), buffer, bufLen)
  449. inc(MemMapFileStream(s).pos, bufLen)
  450. proc newMemMapFileStream*(filename: string, mode: FileMode = fmRead,
  451. fileSize: int = -1): MemMapFileStream =
  452. ## creates a new stream from the file named `filename` with the mode `mode`.
  453. ## Raises ## `OSError` if the file cannot be opened. See the `system
  454. ## <system.html>`_ module for a list of available FileMode enums.
  455. ## `fileSize` can only be set if the file does not exist and is opened
  456. ## with write access (e.g., with fmReadWrite).
  457. var mf: MemFile = open(filename, mode, newFileSize = fileSize)
  458. new(result)
  459. result.mode = mode
  460. result.mf = mf
  461. result.closeImpl = mmsClose
  462. result.atEndImpl = mmsAtEnd
  463. result.setPositionImpl = mmsSetPosition
  464. result.getPositionImpl = mmsGetPosition
  465. result.readDataImpl = mmsReadData
  466. result.peekDataImpl = mmsPeekData
  467. result.writeDataImpl = mmsWriteData
  468. result.flushImpl = mmsFlush