combinators.rkt 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. #lang racket
  2. (require rackunit)
  3. (define (safe-rest lst)
  4. (if (empty? lst)
  5. '()
  6. (rest lst)))
  7. (define (safe-first lst)
  8. (if (empty? lst)
  9. '()
  10. (first lst)))
  11. ; Parse-result = [Struct Boolean Syntaxtree [List of Some] Number]
  12. (struct parsing-success
  13. [syntax-tree
  14. chars
  15. pos]
  16. #:transparent)
  17. (struct parsing-fail
  18. [chars
  19. error-pos]
  20. #:transparent)
  21. (define (bind fun res)
  22. (match res
  23. [(parsing-success syntax-tree chars pos)
  24. (parsing-success (fun syntax-tree)
  25. chars
  26. pos)]
  27. [(parsing-fail _ _)
  28. res]
  29. [_
  30. (fun res)]))
  31. (define (binding fun)
  32. (lambda (res)
  33. (bind fun res)))
  34. ; Parser = (Chars Number -> Parse-success | Parse-fail)
  35. #|
  36. В Zig структура - это пространство имён. Это натолкнуло меня на мысль, что структура и локальная, лексическая область видимости - это понятия, между которыми можно найти много общего. Так, я использовал локальное окружение (надеюсь, я правильно употребил данное выражение), чтобы реализовать ООП на чистом Scheme.
  37. |#
  38. (define (parse parser str)
  39. (define res (parser (string->list str)
  40. 0))
  41. (parsing-success-syntax-tree res))
  42. (define (wrap-parsing-result result wrap-name)
  43. (if wrap-name
  44. (list wrap-name result)
  45. result))
  46. (define (none/p chars pos)
  47. (parsing-fail chars pos))
  48. #| Некрасивая функция, использовать не надо.
  49. (define (atom/cmb pred [wrap-name #f])
  50. (lambda (chars pos)
  51. (if (empty? chars)
  52. (parsing-fail chars pos)
  53. (let ([current (first chars)])
  54. (if (pred current)
  55. (parsing-success
  56. (wrap-parsing-result current wrap-name)
  57. (rest chars)
  58. (+ pos 1))
  59. (parsing-fail
  60. chars pos))))))
  61. |#
  62. (define (atom-if/cmb pred [wrap-name #f])
  63. (define (new-atom-if-parser chars pos)
  64. (match chars
  65. [(cons current rest-chars)
  66. (if (pred current)
  67. (parsing-success (wrap-parsing-result current wrap-name)
  68. rest-chars
  69. (+ pos 1))
  70. (parsing-fail chars
  71. pos))]
  72. [_
  73. (parsing-fail chars
  74. pos)]))
  75. new-atom-if-parser)
  76. (define (atom-of/cmb atom [wrap-name #f])
  77. (define (new-atom-of-parser chars pos)
  78. (match chars
  79. [(cons current rest-chars)
  80. (if (equal? atom current)
  81. (parsing-success (wrap-parsing-result current wrap-name)
  82. rest-chars
  83. (+ pos 1))
  84. (parsing-fail chars
  85. pos))]
  86. [_
  87. (parsing-fail chars
  88. pos)]))
  89. new-atom-of-parser)
  90. (define (many/cmb parser #:to-skip [to-skip none/p])
  91. (lambda (chars pos)
  92. (let loop ([chars chars]
  93. [pos pos]
  94. [fun-res '()])
  95. (define res (parser chars pos))
  96. (match res
  97. [(parsing-success syntax-tree
  98. rest-chars
  99. new-pos)
  100. (loop rest-chars
  101. new-pos
  102. (if (not-save-sym? syntax-tree)
  103. fun-res
  104. (cons syntax-tree fun-res)))]
  105. [(parsing-fail _ _)
  106. (match (to-skip chars pos)
  107. [(parsing-success _ rest-chars new-pos)
  108. (loop (rest chars)
  109. (+ pos 1)
  110. fun-res)]
  111. [_
  112. (parsing-success (reverse fun-res)
  113. chars
  114. pos)])]))))
  115. (define (many1/cmb parser #:to-skip [to-skip none/p])
  116. (lambda (chars pos)
  117. (define many-parser (many/cmb parser #:to-skip to-skip))
  118. (define result (many-parser chars pos))
  119. (match result
  120. [(parsing-success syntax-tree
  121. rest-chars
  122. new-pos)
  123. (if (empty? syntax-tree)
  124. (parsing-fail chars pos)
  125. result)]
  126. [_
  127. (parsing-fail chars pos)])))
  128. (define (seq/cmb #:to-skip [to-skip none/p] . parsers)
  129. (lambda (chars pos)
  130. (let loop ([parsers parsers]
  131. [chars chars]
  132. [pos pos]
  133. [res '()])
  134. (define parser (safe-first parsers))
  135. (define parsed (if (not (empty? parser))
  136. (parser chars pos)
  137. '()))
  138. (match parsed
  139. ['()
  140. (parsing-success (reverse res)
  141. chars
  142. pos)]
  143. [(parsing-success syntax-tree
  144. rest-chars
  145. new-pos)
  146. (loop (rest parsers)
  147. rest-chars
  148. new-pos
  149. (if (not-save-sym? syntax-tree)
  150. res
  151. (cons syntax-tree res)))]
  152. [(parsing-fail fail-chars fail-pos)
  153. (match (to-skip chars pos)
  154. [(parsing-success _ skip-chars skip-pos)
  155. (loop parsers
  156. skip-chars
  157. skip-pos
  158. res)]
  159. [(parsing-fail _ _)
  160. parsed])]))))
  161. (define (lazy/cmb parser)
  162. (define (new-lazy-parser chars pos)
  163. (parser chars pos))
  164. new-lazy-parser)
  165. (define (not-saving/cmb parser)
  166. (compose (binding (λ (x) 'not-save))
  167. parser))
  168. (define (not-save-sym? sym)
  169. (equal? 'not-save sym))
  170. (define (or/cmb . parsers)
  171. (define (new-or-parser chars pos)
  172. (let loop ([parsers parsers])
  173. (match parsers
  174. [(cons parser rest-parsers)
  175. (define res (parser chars pos))
  176. (match res
  177. [(parsing-fail _ _)
  178. (loop rest-parsers)]
  179. [(parsing-success _ _ _)
  180. res])]
  181. ['()
  182. (parsing-fail chars pos)])))
  183. new-or-parser)
  184. (define (opt/cmb parser #:alternative-value [alt-val 'not-save])
  185. (define (new-opt-parser chars pos)
  186. (define res (parser chars pos))
  187. (match res
  188. [(? parsing-success?)
  189. res]
  190. [(? parsing-fail?)
  191. (parsing-success alt-val chars pos)]))
  192. new-opt-parser)
  193. (define (give-name/cmb name parser)
  194. (compose (binding (λ (stx)
  195. (cons name stx)))
  196. parser))
  197. (define (wrap-name/cmb name parser)
  198. (compose (binding (λ (stx)
  199. (list name stx)))
  200. parser))
  201. (define (sep-by/cmb parser sep #:to-skip [to-skip none/p] #:save-sep? [save-sep? #f])
  202. (define new-parser
  203. (seq/cmb #:to-skip to-skip
  204. parser
  205. (many/cmb #:to-skip to-skip
  206. (seq/cmb #:to-skip to-skip
  207. (if save-sep?
  208. sep
  209. (not-saving/cmb sep))
  210. parser))))
  211. (define (new-sep-by-parser chars pos)
  212. (bind
  213. (λ (stx)
  214. (match stx
  215. [(list frst rst)
  216. (cons frst
  217. (apply append rst))]))
  218. (new-parser chars pos)))
  219. new-sep-by-parser)
  220. (define (replace/cmb parser to)
  221. (compose (binding (λ (stx) to))
  222. parser))
  223. (define digit/p
  224. (atom-if/cmb char-numeric?))
  225. (define (digit->integer digit)
  226. (if (char-numeric? digit)
  227. (- (char->integer digit)
  228. 48)
  229. (error 'not-a-digit)))
  230. (define (digits->integer digits)
  231. (let loop ([digits digits]
  232. [acc 0])
  233. (if (empty? digits)
  234. acc
  235. (loop (rest digits)
  236. (+ (* acc 10)
  237. (digit->integer (first digits)))))))
  238. (define integer/p
  239. (wrap-name/cmb
  240. 'integer
  241. (compose (binding digits->integer)
  242. (many1/cmb digit/p))))
  243. (define whitespaces/p
  244. (many1/cmb (atom-if/cmb char-whitespace?)))
  245. (define plus/p (atom-of/cmb #\+))
  246. (define star/p (atom-of/cmb #\*))
  247. (define minus/p (atom-of/cmb #\-))
  248. (define slash/p (atom-of/cmb #\/))
  249. (define (left-bracket/p chars pos)
  250. ((atom-of/cmb #\( )
  251. chars pos))
  252. (define (right-bracket/p chars pos)
  253. ((atom-of/cmb #\) )
  254. chars pos))
  255. #|
  256. (define (brackets/p chars pos)
  257. ;(println "brackets")
  258. ((compose (binding first)
  259. (seq/cmb #:to-skip whitespaces/p
  260. (not-saving/cmb left-bracket/p)
  261. expr/p
  262. (not-saving/cmb right-bracket/p)))
  263. chars pos))
  264. (define (math-atom/p chars pos)
  265. ;(println "math-atom")
  266. ((or/cmb integer/p
  267. brackets/p)
  268. chars pos))
  269. (define (op1/p chars pos)
  270. (define parser
  271. (seq/cmb #:to-skip whitespaces/p
  272. math-atom/p
  273. (or/cmb star/p
  274. div/p)
  275. (or/cmb op1/p
  276. math-atom/p)))
  277. (bind
  278. (λ (x)
  279. (match x
  280. [(list first-operand operator second-operand)
  281. (list operator first-operand second-operand)]))
  282. (parser chars pos)))
  283. (define (op2/p chars pos)
  284. (define parser
  285. (seq/cmb #:to-skip whitespaces/p
  286. (or/cmb
  287. op1/p
  288. math-atom/p)
  289. (or/cmb plus/p
  290. minus/p)
  291. (or/cmb op2/p
  292. op1/p
  293. math-atom/p)))
  294. (bind
  295. (λ (x)
  296. (match x
  297. [(list first-operand operator second-operand)
  298. (list operator first-operand second-operand)]))
  299. (parser chars pos)))
  300. (define (expr/p chars pos)
  301. ((or/cmb
  302. op2/p
  303. op1/p
  304. math-atom/p)
  305. chars pos))
  306. |#
  307. (define (infix->prefix infix-list)
  308. (match infix-list
  309. ['()
  310. '()]
  311. [(list expr)
  312. expr]
  313. [(cons op1 (cons oper (cons op2 rst)))
  314. (infix->prefix (cons (list oper op1 op2)
  315. rst))]))
  316. (define (op1/p chars pos)
  317. (define parser
  318. (sep-by/cmb #:to-skip whitespaces/p #:save-sep? #t
  319. math-atom/p
  320. (or/cmb (replace/cmb star/p 'mul-op)
  321. (replace/cmb slash/p 'div-op))))
  322. (define res (parser chars pos))
  323. (bind infix->prefix
  324. res))
  325. (define (op2/p chars pos)
  326. (define parser
  327. (sep-by/cmb #:to-skip whitespaces/p #:save-sep? #t
  328. op1/p
  329. (or/cmb (replace/cmb plus/p 'add-op)
  330. (replace/cmb minus/p 'sub-op))))
  331. (define res (parser chars pos))
  332. (bind infix->prefix
  333. res))
  334. (define (expr/p chars pos)
  335. ((or/cmb
  336. op2/p
  337. op1/p
  338. math-atom/p)
  339. chars pos))
  340. (define (brackets/p chars pos)
  341. ;(println "brackets")
  342. ((compose (binding first)
  343. (seq/cmb #:to-skip whitespaces/p
  344. (not-saving/cmb left-bracket/p)
  345. expr/p
  346. (not-saving/cmb right-bracket/p)))
  347. chars pos))
  348. (define (math-atom/p chars pos)
  349. ;(println "math-atom")
  350. ((or/cmb integer/p
  351. brackets/p)
  352. chars pos))
  353. (define (letter/p chars pos)
  354. ((atom-if/cmb char-alphabetic?)
  355. chars
  356. pos))
  357. (define (identifier/p chars pos)
  358. (define parser
  359. (seq/cmb
  360. letter/p
  361. (many/cmb
  362. (or/cmb letter/p
  363. digit/p))))
  364. (define res (parser chars pos))
  365. (bind (λ (stx)
  366. (list 'ident
  367. (list->string
  368. (cons (first stx)
  369. (second stx)))))
  370. res))
  371. (define (fun-call/p chars pos)
  372. (define parser
  373. (give-name/cmb 'fun-call
  374. (seq/cmb #:to-skip whitespaces/p
  375. identifier/p
  376. (not-saving/cmb left-bracket/p)
  377. (sep-by/cmb #:to-skip whitespaces/p
  378. expr/p
  379. (atom-of/cmb #\,)))))
  380. (parser chars pos))
  381. (define s->l string->list)
  382. (define (digit/p-test)
  383. (check-match (digit/p (s->l "123") 0)
  384. (parsing-success #\1 '(#\2 #\3) 1))
  385. (check-match (digit/p (s->l "abc") 0)
  386. (parsing-fail '(#\a #\b #\c) 0))
  387. (check-match (digit/p '() 0)
  388. (parsing-fail '() 0)))
  389. (define (integer/p-test)
  390. (check-match (integer/p (s->l "123") 0)
  391. (parsing-success '(integer 123)
  392. '()
  393. 3))
  394. (check-match (integer/p '() 0)
  395. (parsing-fail '() 0))
  396. (check-match (integer/p (s->l "abc123") 0)
  397. (parsing-fail '(#\a #\b #\c #\1 #\2 #\3)
  398. 0)))
  399. (define (atom-if-test)
  400. (define parser (atom-if/cmb char-numeric?))
  401. (check-match (parser '() 0)
  402. (parsing-fail '() 0))
  403. (check-match (parser (s->l "123")
  404. 0)
  405. (parsing-success #\1 '(#\2 #\3) 1))
  406. (check-match (parser (s->l "!123")
  407. 0)
  408. (parsing-fail '(#\! #\1 #\2 #\3) 0)))
  409. (define (atom-of-test)
  410. (define parser (atom-of/cmb #\+))
  411. (check-match (parser '() 0)
  412. (parsing-fail '() 0))
  413. (check-match (parser (s->l "+23")
  414. 0)
  415. (parsing-success #\+ '(#\2 #\3) 1))
  416. (check-match (parser (s->l "!123")
  417. 0)
  418. (parsing-fail '(#\! #\1 #\2 #\3) 0)))
  419. (define (many-test)
  420. (define parser (many/cmb (atom-if/cmb char-numeric?)))
  421. (check-match (parser '() 0)
  422. (parsing-success '() '() 0))
  423. (check-match (parser (s->l "123a") 0)
  424. (parsing-success '(#\1 #\2 #\3) '(#\a) 3))
  425. (check-match (parser (s->l "abc") 0)
  426. (parsing-success '() '(#\a #\b #\c) 0)))
  427. (define (many1-test)
  428. (define parser (many1/cmb (atom-if/cmb char-numeric?)))
  429. (check-match (parser '() 0)
  430. (parsing-fail '() 0))
  431. (check-match (parser (s->l "123a") 0)
  432. (parsing-success '(#\1 #\2 #\3) '(#\a) 3))
  433. (check-match (parser (s->l "abc") 0)
  434. (parsing-fail '(#\a #\b #\c) 0)))
  435. (define (seq-test) ; Нужно выяснить, что должно возвращаться в parsing-fail - позиция, с которой начинался парсинг,
  436. (define parser (seq/cmb integer/p ; или та, на которой произошла ошибка. Выяснил. Кажется, позиция ошибки.
  437. (atom-of/cmb #\.)
  438. integer/p))
  439. (check-match (parser (s->l "123+4") 0)
  440. (parsing-fail '(#\+ #\4)
  441. 3))
  442. (check-match (parser (s->l "123.45") 0)
  443. (parsing-success '((integer 123)
  444. #\.
  445. (integer 45))
  446. '()
  447. 6)))
  448. (define (skip-test)
  449. (define parser-many (many/cmb digit/p #:to-skip whitespaces/p))
  450. (check-match (parser-many (s->l "123 456")
  451. 0)
  452. (parsing-success '(#\1 #\2 #\3 #\4 #\5 #\6)
  453. '()
  454. 8))
  455. (define parser-seq (seq/cmb #:to-skip integer/p
  456. (many1/cmb (atom-if/cmb char-alphabetic?))
  457. (atom-of/cmb #\+)
  458. (many1/cmb (atom-if/cmb char-alphabetic?))))
  459. (check-match (parser-seq (s->l "123abc+123abc123")
  460. 0)
  461. (parsing-success '((#\a #\b #\c)
  462. #\+
  463. (#\a #\b #\c))
  464. '(#\1 #\2 #\3)
  465. 13)))
  466. (define (opt-test)
  467. (define parser (opt/cmb (atom-if/cmb char-numeric?)
  468. #:alternative-value 'not-save))
  469. (check-match (parser (s->l "1234")
  470. 0)
  471. (parsing-success #\1
  472. '(#\2 #\3 #\4)
  473. 1))
  474. (check-match (parser (s->l "a1234")
  475. 0)
  476. (parsing-success 'not-save
  477. '(#\a #\1 #\2 #\3 #\4)
  478. 0)))
  479. (define (not-saving-test)
  480. (define parser (not-saving/cmb digit/p))
  481. (check-match (parser (s->l "123")
  482. 0)
  483. (parsing-success 'not-save
  484. '(#\2 #\3)
  485. 1))
  486. (check-match (parser (s->l "abc")
  487. 0)
  488. (parsing-fail '(#\a #\b #\c)
  489. 0))
  490. (define parser-2 (many/cmb (not-saving/cmb digit/p)))
  491. (check-match (parser-2 (s->l "123")
  492. 0)
  493. (parsing-success '()
  494. '()
  495. 3))
  496. (define parser-3 (seq/cmb integer/p
  497. (not-saving/cmb plus/p)
  498. integer/p))
  499. (check-match (parser-3 (s->l "1+3")
  500. 0)
  501. (parsing-success '((integer 1)
  502. (integer 3))
  503. '()
  504. 3)))
  505. (define (sep-by-test)
  506. (define parser (sep-by/cmb integer/p (atom-of/cmb #\,)))
  507. (check-match (parser (s->l "1,2,3")
  508. 0)
  509. (parsing-success '((integer 1)
  510. (integer 2)
  511. (integer 3))
  512. '()
  513. 5))
  514. (check-match (parser (s->l "1,2,3,")
  515. 0)
  516. (parsing-success '((integer 1)
  517. (integer 2)
  518. (integer 3))
  519. '(#\,)
  520. 5))
  521. (define parser-2
  522. (sep-by/cmb #:to-skip whitespaces/p
  523. integer/p
  524. plus/p))
  525. (check-match (parser-2 (s->l "1 + 2 + 3 4")
  526. 0)
  527. (parsing-success '((integer 1)
  528. (integer 2)
  529. (integer 3))
  530. '(#\4) ; Внимание! Тут съедаются пробелы в конце.
  531. 9)))
  532. (define (test-all)
  533. (atom-if-test)
  534. (atom-of-test)
  535. (digit/p-test)
  536. (many-test)
  537. (many1-test)
  538. (integer/p-test)
  539. (seq-test)
  540. (skip-test)
  541. (opt-test)
  542. (not-saving-test)
  543. (sep-by-test))
  544. (define (parse-tr parser str)
  545. (parser (string->list str) 0))
  546. (define (string-mul string num)
  547. (apply string-append
  548. (for/list ([i (in-range 0 num)])
  549. string)))
  550. (define (put-in-brackets str)
  551. (string-append "(" str ")"))
  552. (define test-string
  553. (string-append (string-mul "(2 + 5 / (6 + 4)) + 4 + 6 + 9 * 7 * 8 +" 1000)
  554. "5 * 4 - 16"))