buffered-input-file.lisp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. ;; This software is Copyright (c) cage, 2012.
  2. ;; cage grants you the rights to distribute
  3. ;; and use this software as governed by the terms
  4. ;; of the Lisp Lesser GNU Public License
  5. ;; (http://opensource.franz.com/preamble.html),
  6. ;; known as the LLGPL
  7. (in-package :cl-i18n)
  8. (alexandria:define-constant +default-buffer-size+ 100000 :test #'=)
  9. (alexandria:define-constant +default-element-type+ '(unsigned-byte 8) :test #'equal)
  10. (defun make-buffer (&optional (length +default-buffer-size+))
  11. (make-array length :element-type +default-element-type+
  12. :fill-pointer length :initial-element 0 :adjustable t))
  13. (defun buffer-uchar-length (buffer)
  14. (babel:vector-size-in-chars buffer))
  15. ; file |b b b b b b b b b b b b b b b b b b b |
  16. ; buffer |c c c c c c c|
  17. ; buffer-position |c c c c c ^
  18. ; logical-file-position |c c c c c c c c c ^
  19. ; file-position |b b b b b b b b b b b ^
  20. ; uchars-count |c c c c c c c c c c c ^
  21. (defclass buffered-input-file ()
  22. ((filename
  23. :initarg :filename
  24. :initform nil
  25. :accessor filename)
  26. (buffer
  27. :initarg :buffer
  28. :initform (make-buffer)
  29. :accessor buffer)
  30. (logical-file-position
  31. :initarg :logical-file-position
  32. :initform 0
  33. :accessor logical-file-position)
  34. (uchars-count
  35. :initarg :uchars-count
  36. :initform 0
  37. :accessor uchars-count)
  38. (buffer-position
  39. :initarg :buffer-position
  40. :initform 0
  41. :accessor buffer-position)
  42. (inner-stream
  43. :initarg :inner-stream
  44. :initform nil
  45. :accessor inner-stream)
  46. (cached-string
  47. :initarg :cached-string
  48. :initform nil
  49. :accessor cached-string)
  50. (line-mode
  51. :initform nil
  52. :initarg :line-mode
  53. :accessor line-mode)
  54. (statistics
  55. :initarg :statistics
  56. :initform nil
  57. :accessor statistics)))
  58. (defmethod print-object ((object buffered-input-file) stream)
  59. (format stream "buffer ~s bin ~a string ~s (length ~a) physical file position ~a buffer position ~a logical file-position ~a char-count ~a"
  60. (map 'vector #'(lambda (v) (format nil "~x" v)) (buffer object))
  61. (map 'vector #'(lambda (v) (format nil "~b" v)) (buffer object))
  62. (concatenate 'string
  63. (subseq (babel:octets-to-string (buffer object)) 0
  64. (buffer-position object))
  65. "*"
  66. (subseq (babel:octets-to-string (buffer object))
  67. (buffer-position object)))
  68. (length (babel:octets-to-string (buffer object)))
  69. (actual-file-position object)
  70. (buffer-position object)
  71. (logical-file-position object)
  72. (uchars-count object)))
  73. (defmethod initialize-instance :after ((object buffered-input-file) &key &allow-other-keys)
  74. (with-accessors ((stream inner-stream) (buffer buffer) (filename filename)
  75. (cached-string cached-string)
  76. (inner-file-position inner-file-position)
  77. (uchars-count uchars-count)) object
  78. (if filename
  79. (progn
  80. (setf filename filename)
  81. (setf stream (open filename :direction :input :element-type +default-element-type+
  82. :if-does-not-exist :error))
  83. (when (<= (stream-length object) (length buffer))
  84. (setf buffer (make-buffer (stream-length object))))
  85. (read-adjust-buffer object))
  86. (progn
  87. (setf buffer (babel:string-to-octets buffer)
  88. cached-string (babel:octets-to-string buffer))))
  89. (setf uchars-count (length cached-string))))
  90. (defgeneric stream-length (object))
  91. (defgeneric actual-file-position (object &optional pos))
  92. (defgeneric close-file (object))
  93. (defgeneric valid-stream-p (object))
  94. (defgeneric inside-buffer-p (object pos &key as-char))
  95. (defgeneric outside-buffer-p (object pos))
  96. (defgeneric replace-buffer (object &key direction))
  97. (defgeneric replace-buffer-forward (object))
  98. (defgeneric replace-buffer-backward (object))
  99. (defgeneric truncate-buffer (object pos))
  100. (defgeneric enlarge-buffer (object))
  101. (defgeneric adjust-buffer (object))
  102. (defgeneric adjust-buffer-backward (object))
  103. (defgeneric read-adjust-buffer (object))
  104. (defgeneric regex-scan (object regex &optional sticky last-start last-end))
  105. (defgeneric regex-scan-line-simple (object regex))
  106. (defgeneric regex-scan-line-mode (object regex &optional sticky last-start last-end))
  107. (defgeneric increment-pointer-then-get-char (object))
  108. (defgeneric get-char-then-increment-pointer (object))
  109. (defgeneric get-char (object))
  110. (defgeneric get-line (object &key line-separator))
  111. (defgeneric unget-char (object &optional position))
  112. (defgeneric increment-pointer (object))
  113. (defgeneric decrement-pointer (object))
  114. (defgeneric seek (object pos))
  115. (defmethod adjust-buffer ((object buffered-input-file))
  116. (with-accessors ((stream inner-stream) (buffer buffer)
  117. (buffer-position buffer-position)
  118. (cached-string cached-string)
  119. (uchars-count uchars-count)
  120. (logical-file-position logical-file-position)) object
  121. (let* ((last-leading (do ((ct (1- (length buffer)) (1- ct)))
  122. ((> (uchar-length (elt buffer ct)) 0)
  123. ct)))
  124. (uchar-size (uchar-length (elt buffer last-leading))))
  125. (loop for i from 0 below (- uchar-size (length (subseq buffer last-leading))) do
  126. (vector-push-extend (read-byte stream) buffer))
  127. (setf cached-string (babel:octets-to-string buffer)))))
  128. (defmethod adjust-buffer-backward ((object buffered-input-file))
  129. (with-accessors ((stream inner-stream) (buffer buffer)
  130. (buffer-position buffer-position)
  131. (uchars-count uchars-count)
  132. (cached-string cached-string)
  133. (logical-file-position logical-file-position)) object
  134. (let ((old-file-pos (actual-file-position object)))
  135. (actual-file-position object (- (actual-file-position object) (length buffer)))
  136. (do ((ct 0 (1+ ct)))
  137. ((or
  138. (> (uchar-length (elt buffer 0)) 0)
  139. (> ct 10)))
  140. (if (> (file-position stream) 0)
  141. (progn
  142. (actual-file-position object (1- (actual-file-position object)))
  143. (let* ((new-byte (read-byte stream))
  144. (new-vector (make-buffer 1)))
  145. (setf (aref new-vector 0) new-byte)
  146. (setf buffer (concatenate '(vector (unsigned-byte 8)) new-vector buffer))
  147. (actual-file-position object (1- (actual-file-position object)))))
  148. (error 'i18n-conditions:out-of-bounds :seq buffer :idx (actual-file-position object))))
  149. (setf cached-string (babel:octets-to-string buffer))
  150. (actual-file-position object old-file-pos))))
  151. (defmethod read-adjust-buffer ((object buffered-input-file))
  152. (with-accessors ((stream inner-stream) (buffer buffer)
  153. (buffer-position buffer-position)
  154. (uchars-count uchars-count)
  155. (logical-file-position logical-file-position)) object
  156. (let ((count (read-sequence buffer stream)))
  157. (when (> count 0)
  158. (adjust-buffer object)))))
  159. (defmethod close-file ((object buffered-input-file))
  160. (with-accessors ((stream inner-stream)) object
  161. (when stream
  162. (close stream))))
  163. (defmacro with-ustring ((var object) &body body)
  164. `(let ((,var (if (null (cached-string ,object))
  165. (babel:octets-to-string (buffer ,object))
  166. (cached-string ,object))))
  167. ,@body))
  168. (defmethod stream-length ((object buffered-input-file))
  169. (with-accessors ((stream inner-stream)
  170. (buffer buffer)) object
  171. (if stream
  172. (file-length stream)
  173. (with-ustring (ustring object)
  174. (length ustring)))))
  175. (defmethod actual-file-position ((object buffered-input-file) &optional (pos nil))
  176. (labels ((cat (&rest s)
  177. (apply #'concatenate 'string s))
  178. (pos-error (message)
  179. (error (concatenate 'string
  180. (format nil "The value for argument pos (~a) " pos)
  181. message)))
  182. (check-pos-valid ()
  183. (when pos
  184. (etypecase pos
  185. (integer
  186. (when (< pos 0)
  187. (pos-error "is not a positive integer")))
  188. (keyword
  189. (when (not (or (eq pos :start)
  190. (eq pos :end)))
  191. (pos-error (cat "is not valid: accepted values are: :start or :end "
  192. "(or a positive integer)"))))))))
  193. (with-accessors ((stream inner-stream)
  194. (buffer buffer)) object
  195. (if stream
  196. (progn
  197. (check-pos-valid)
  198. (if pos
  199. (file-position stream pos)
  200. (file-position stream)))
  201. (length buffer)))))
  202. (defmethod valid-stream-p ((object buffered-input-file))
  203. (with-accessors ((stream inner-stream)) object
  204. (if stream
  205. (< (logical-file-position object) (uchars-count object))
  206. (< (logical-file-position object) (stream-length object)))))
  207. (defmethod inside-buffer-p ((object buffered-input-file) pos &key (as-char nil))
  208. (with-accessors ((buffer buffer)) object
  209. (and (>= pos 0)
  210. (< pos (length (if as-char
  211. (cached-string object)
  212. buffer))))))
  213. (defmethod outside-buffer-p ((object buffered-input-file) pos)
  214. (not (inside-buffer-p object pos)))
  215. (defmethod replace-buffer ((object buffered-input-file) &key (direction :forward))
  216. (ecase direction
  217. (:forward
  218. (replace-buffer-forward object))
  219. (:backward
  220. (replace-buffer-backward object))))
  221. (defmacro with-file-position ((var object) &body body)
  222. `(let ((,var (actual-file-position ,object)))
  223. ,@body))
  224. (defmethod replace-buffer-forward ((object buffered-input-file))
  225. (with-accessors ((stream inner-stream) (buffer buffer)
  226. (buffer-position buffer-position)
  227. (cached-string cached-string)
  228. (uchars-count uchars-count)
  229. (logical-file-position logical-file-position)) object
  230. (with-file-position (inner-file-position object)
  231. (if (< inner-file-position (stream-length object))
  232. (progn
  233. (if (< (+ inner-file-position +default-buffer-size+)
  234. (stream-length object))
  235. (setf buffer (make-buffer))
  236. (setf buffer (make-buffer (- (stream-length object) inner-file-position))))
  237. (setf buffer-position 0)
  238. (read-adjust-buffer object)
  239. (incf uchars-count (length cached-string)))
  240. nil))))
  241. (defmethod truncate-buffer ((object buffered-input-file) pos)
  242. (with-accessors ((stream inner-stream) (buffer buffer)
  243. (buffer-position buffer-position)
  244. (cached-string cached-string)
  245. (uchars-count uchars-count)
  246. (logical-file-position logical-file-position)) object
  247. (with-file-position (inner-file-position object)
  248. (if (< (+ inner-file-position +default-buffer-size+)
  249. (stream-length object))
  250. (setf buffer (make-buffer))
  251. (setf buffer (make-buffer (- (stream-length object) inner-file-position))))
  252. (setf buffer-position 0)
  253. (read-adjust-buffer object)
  254. (setf uchars-count (+ pos (length cached-string))))))
  255. (defmethod replace-buffer-backward ((object buffered-input-file))
  256. (with-accessors ((stream inner-stream) (buffer buffer)
  257. (buffer-position buffer-position)
  258. (uchars-count uchars-count)
  259. (logical-file-position logical-file-position)) object
  260. (with-file-position (inner-file-position object)
  261. (if (and stream
  262. (> logical-file-position 0)
  263. (> inner-file-position 0))
  264. (let* ((old-buffer-length (length buffer))
  265. (old-buffer-length-char (buffer-uchar-length buffer))
  266. (new-buffer-length
  267. (if (> (- inner-file-position old-buffer-length +default-buffer-size+) 0)
  268. +default-buffer-size+
  269. (- inner-file-position old-buffer-length))))
  270. (actual-file-position object (- inner-file-position old-buffer-length new-buffer-length))
  271. (setf buffer (make-buffer new-buffer-length))
  272. (read-sequence buffer stream)
  273. (adjust-buffer-backward object)
  274. (decf uchars-count old-buffer-length-char)
  275. (setf new-buffer-length (length (cached-string object)))
  276. (setf buffer-position (1- new-buffer-length))
  277. logical-file-position)
  278. (progn
  279. (setf buffer-position 0)
  280. nil)))))
  281. (defmethod enlarge-buffer ((object buffered-input-file))
  282. (with-accessors ((stream inner-stream) (buffer buffer)
  283. (cached-string cached-string)
  284. (buffer-position buffer-position)
  285. (uchars-count uchars-count)) object
  286. (with-file-position (inner-file-position object)
  287. (if (< inner-file-position (stream-length object))
  288. (with-ustring (old-string object)
  289. (let* ((old-buffer (alexandria:copy-array buffer))
  290. (file-pos-inc
  291. (if (< (+ inner-file-position +default-buffer-size+)
  292. (stream-length object))
  293. +default-buffer-size+
  294. (- (stream-length object) inner-file-position)))
  295. (actual-length
  296. (+ (length old-buffer) file-pos-inc)))
  297. (file-position stream (- (file-position stream)
  298. (length old-buffer)))
  299. (decf uchars-count (length old-string))
  300. (setf buffer (make-buffer actual-length))
  301. (read-adjust-buffer object) ;; also set cached-string
  302. (incf uchars-count (length cached-string))
  303. buffer))
  304. nil))))
  305. (defmethod regex-scan ((object buffered-input-file)
  306. regex &optional (sticky t)
  307. (last-start nil) (last-end nil))
  308. (if (line-mode object)
  309. (regex-scan-line-mode object regex sticky last-start last-end)
  310. (with-accessors ((stream stream) (buffer buffer)
  311. (logical-file-position logical-file-position)
  312. (buffer-position buffer-position)
  313. (uchars-count uchars-count)) object
  314. (with-ustring (ustring object)
  315. (multiple-value-bind (start end)
  316. (cl-ppcre:scan regex ustring
  317. :start buffer-position)
  318. (let ((all-buffer-length (- uchars-count (length ustring))))
  319. (if (not start) ; match not found
  320. (if (enlarge-buffer object)
  321. (regex-scan object regex sticky last-start last-end)
  322. (progn
  323. (replace-buffer object)
  324. (values nil nil nil)))
  325. (if (or (not sticky)
  326. (equal start buffer-position))
  327. (if (and last-start
  328. last-end
  329. (= start last-start)
  330. (= end last-end))
  331. (values (subseq ustring start end)
  332. (+ start all-buffer-length)
  333. (+ end all-buffer-length))
  334. (if (enlarge-buffer object)
  335. (regex-scan object regex sticky start end)
  336. (values (subseq ustring start end)
  337. (+ start all-buffer-length)
  338. (+ end all-buffer-length))))
  339. (values nil nil nil)))))))))
  340. (defmethod regex-scan-line-simple ((object buffered-input-file) (regex function))
  341. (multiple-value-bind (line line-length line-start)
  342. (get-line object)
  343. (unwind-protect
  344. (multiple-value-bind (start end register-starts register-ends)
  345. (cl-ppcre:scan regex line)
  346. (declare (ignore end))
  347. (if (not start) ; match not found
  348. (values -1 line-start (+ line-start line-length) nil)
  349. (let ((match-pos (position-if #'(lambda (a) (not (null a))) register-starts)))
  350. (values match-pos
  351. line-start
  352. (+ line-start line-length)
  353. (subseq line (elt register-starts match-pos)
  354. (elt register-ends match-pos))
  355. (+ (elt register-starts match-pos) line-start)
  356. (+ (elt register-ends match-pos) line-start)))))
  357. (seek object line-start))))
  358. (defmethod regex-scan-line-mode ((object buffered-input-file)
  359. regex &optional (sticky t)
  360. (last-start nil) (last-end nil))
  361. (declare (ignore last-start last-end))
  362. (multiple-value-bind (line line-length line-start)
  363. (cl-i18n:get-line object)
  364. (declare (ignore line-length))
  365. (unwind-protect
  366. (multiple-value-bind (start end)
  367. (cl-ppcre:scan regex line)
  368. (if (not start) ; match not found
  369. (values nil nil nil)
  370. (if sticky
  371. (if (= start 0)
  372. (progn
  373. (values (subseq line start end)
  374. (+ line-start start)
  375. (+ line-start start end)))
  376. (values nil nil nil))
  377. (progn
  378. (values (subseq line start end)
  379. (+ line-start start)
  380. (+ line-start start end))))))
  381. (cl-i18n:seek object line-start))))
  382. (defmethod get-char-then-increment-pointer ((object buffered-input-file))
  383. (let ((char (get-char object)))
  384. (when char
  385. (increment-pointer object))
  386. char))
  387. (defmethod increment-pointer-then-get-char ((object buffered-input-file))
  388. (when (increment-pointer object)
  389. (get-char object)))
  390. (defmethod get-char ((object buffered-input-file))
  391. (with-accessors ((buffer buffer)
  392. (buffer-position buffer-position)) object
  393. (with-ustring (uchar-buff object)
  394. (if (valid-stream-p object)
  395. (elt uchar-buff buffer-position)
  396. nil))))
  397. (defmethod get-line (object &key (line-separator #\newline))
  398. (do* ((start-pos (logical-file-position object))
  399. (count 0 (1+ count))
  400. (read (get-char-then-increment-pointer object)
  401. (get-char-then-increment-pointer object))
  402. (line (string read) (concatenate 'string line (string read))))
  403. ((or (not read)
  404. (char= read line-separator))
  405. (values line count start-pos))))
  406. (defmethod unget-char ((object buffered-input-file)
  407. &optional (position (1- (buffer-position object))))
  408. (with-accessors ((stream stream) (buffer buffer)
  409. (logical-file-position logical-file-position)
  410. (buffer-position buffer-position)) object
  411. (with-ustring (ubuffer object)
  412. (cond
  413. ((inside-buffer-p object position)
  414. (prog1
  415. (elt ubuffer position)
  416. (decf logical-file-position)
  417. (setf buffer-position position)))
  418. (t
  419. (if (replace-buffer object :direction :backward)
  420. (unget-char object (buffer-position object))
  421. nil))))))
  422. (defmethod increment-pointer ((object buffered-input-file))
  423. (with-accessors ((logical-file-position logical-file-position)
  424. (buffer buffer)
  425. (uchars-count uchars-count)
  426. (buffer-position buffer-position)) object
  427. (let ((saved-bufferpos buffer-position)
  428. (saved-filepos logical-file-position)
  429. (buffer-length (length (cached-string object))))
  430. (if (valid-stream-p object)
  431. (progn
  432. (incf logical-file-position)
  433. (incf buffer-position)
  434. (when (not (< buffer-position buffer-length))
  435. (replace-buffer object :direction :forward))
  436. logical-file-position)
  437. (progn
  438. (setf buffer-position saved-bufferpos
  439. logical-file-position saved-filepos)
  440. nil)))))
  441. (defmethod decrement-pointer ((object buffered-input-file))
  442. (with-accessors ((logical-file-position logical-file-position)
  443. (buffer buffer)
  444. (uchars-count uchars-count)
  445. (buffer-position buffer-position)) object
  446. (let ((saved-bufferpos buffer-position)
  447. (saved-filepos logical-file-position))
  448. (if (> logical-file-position 0)
  449. (progn
  450. (if (<= buffer-position 0)
  451. (when (replace-buffer object :direction :backward)
  452. (decf logical-file-position))
  453. (progn
  454. (decf logical-file-position)
  455. (decf buffer-position)))
  456. logical-file-position)
  457. (progn
  458. (setf buffer-position saved-bufferpos
  459. logical-file-position saved-filepos)
  460. nil)))))
  461. (defmethod seek ((object buffered-input-file) pos)
  462. (with-accessors ((logical-file-position logical-file-position)
  463. (uchars-count uchars-count)
  464. (buffer buffer)
  465. (buffer-position buffer-position)) object
  466. (if (filename object) ;; slow...
  467. (let ((uchar-diff (- pos logical-file-position)))
  468. (when (/= uchar-diff 0)
  469. (loop for i from 0 below (abs uchar-diff) do
  470. (if (plusp uchar-diff)
  471. (increment-pointer object)
  472. (decrement-pointer object)))))
  473. (when (and (>= pos 0)
  474. (<= pos (length buffer)))
  475. (setf buffer-position pos
  476. logical-file-position pos)))))