lexer.mll 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. {
  2. open Parser
  3. open Render_info
  4. open Tex
  5. }
  6. let space = [' ' '\t' '\n' '\r']
  7. let alpha = ['a'-'z' 'A'-'Z']
  8. let literal_id = ['a'-'z' 'A'-'Z']
  9. let literal_mn = ['0'-'9']
  10. let literal_uf_lt = [',' ':' ';' '?' '!' '\'']
  11. let delimiter_uf_lt = ['(' ')' '.']
  12. let literal_uf_op = ['+' '-' '*' '=']
  13. let delimiter_uf_op = ['/' '|']
  14. let boxchars = ['0'-'9' 'a'-'z' 'A'-'Z' '+' '-' '*' ',' '=' '(' ')' ':' '/' ';' '?' '.' '!' ' ' '\128'-'\255']
  15. let aboxchars = ['0'-'9' 'a'-'z' 'A'-'Z' '+' '-' '*' ',' '=' '(' ')' ':' '/' ';' '?' '.' '!' ' ']
  16. rule token = parse
  17. space + { token lexbuf }
  18. | "\\text" space * '{' boxchars + '}'
  19. { Texutil.tex_use_ams (); let str = Lexing.lexeme lexbuf in
  20. let n = String.index str '{' + 1 in
  21. BOX ("\\text", String.sub str n (String.length str - n - 1)) }
  22. | "\\mbox" space * '{' aboxchars + '}'
  23. { let str = Lexing.lexeme lexbuf in
  24. let n = String.index str '{' + 1 in
  25. BOX ("\\mbox", String.sub str n (String.length str - n - 1)) }
  26. | "\\hbox" space * '{' aboxchars + '}'
  27. { let str = Lexing.lexeme lexbuf in
  28. let n = String.index str '{' + 1 in
  29. BOX ("\\hbox", String.sub str n (String.length str - n - 1)) }
  30. | "\\vbox" space * '{' aboxchars + '}'
  31. { let str = Lexing.lexeme lexbuf in
  32. let n = String.index str '{' + 1 in
  33. BOX ("\\vbox", String.sub str n (String.length str - n - 1)) }
  34. | "\\mbox" space * '{' boxchars + '}'
  35. { let str = Lexing.lexeme lexbuf in
  36. let n = String.index str '{' + 1 in
  37. Texutil.tex_use_nonascii();
  38. BOX ("\\mbox", String.sub str n (String.length str - n - 1)) }
  39. | "\\hbox" space * '{' boxchars + '}'
  40. { let str = Lexing.lexeme lexbuf in
  41. let n = String.index str '{' + 1 in
  42. Texutil.tex_use_nonascii();
  43. BOX ("\\hbox", String.sub str n (String.length str - n - 1)) }
  44. | "\\vbox" space * '{' boxchars + '}'
  45. { let str = Lexing.lexeme lexbuf in
  46. let n = String.index str '{' + 1 in
  47. Texutil.tex_use_nonascii();
  48. BOX ("\\vbox", String.sub str n (String.length str - n - 1)) }
  49. | literal_id { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_IT, str,str,MI,str)) }
  50. | literal_mn { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_RM, str,str,MN,str)) }
  51. | literal_uf_lt { let str = Lexing.lexeme lexbuf in LITERAL (HTMLABLEC (FONT_UFH, str,str)) }
  52. | delimiter_uf_lt { let str = Lexing.lexeme lexbuf in DELIMITER (HTMLABLEC (FONT_UFH, str,str)) }
  53. | "-" { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_UFH,"-"," − ",MO,str))}
  54. | literal_uf_op { let str = Lexing.lexeme lexbuf in LITERAL (MHTMLABLEC (FONT_UFH, str," "^str^" ",MO,str)) }
  55. | delimiter_uf_op { let str = Lexing.lexeme lexbuf in DELIMITER (MHTMLABLEC (FONT_UFH, str," "^str^" ",MO,str)) }
  56. | "\\" alpha + { Texutil.find (Lexing.lexeme lexbuf) }
  57. | "\\sqrt" space * "[" { FUN_AR1opt "\\sqrt" }
  58. | "\\xleftarrow" space * "[" { Texutil.tex_use_ams(); FUN_AR1opt "\\xleftarrow" }
  59. | "\\xrightarrow" space * "[" { Texutil.tex_use_ams(); FUN_AR1opt "\\xrightarrow" }
  60. | "\\," { LITERAL (HTMLABLE (FONT_UF, "\\,"," ")) }
  61. | "\\ " { LITERAL (HTMLABLE (FONT_UF, "\\ "," ")) }
  62. | "\\;" { LITERAL (HTMLABLE (FONT_UF, "\\;"," ")) }
  63. | "\\!" { LITERAL (TEX_ONLY "\\!") }
  64. | "\\{" { DELIMITER (HTMLABLEC(FONT_UFH,"\\{","{")) }
  65. | "\\}" { DELIMITER (HTMLABLEC(FONT_UFH,"\\}","}")) }
  66. | "\\|" { DELIMITER (HTMLABLE (FONT_UFH,"\\|","||")) }
  67. | "\\_" { LITERAL (HTMLABLEC(FONT_UFH,"\\_","_")) }
  68. | "\\#" { LITERAL (HTMLABLE (FONT_UFH,"\\#","#")) }
  69. | "\\%" { LITERAL (HTMLABLE (FONT_UFH,"\\%","%")) }
  70. | "\\$" { LITERAL (HTMLABLE (FONT_UFH,"\\$","$")) }
  71. | "&" { NEXT_CELL }
  72. | "\\\\" { NEXT_ROW }
  73. | "\\begin{matrix}" { Texutil.tex_use_ams(); BEGIN__MATRIX }
  74. | "\\end{matrix}" { END__MATRIX }
  75. | "\\begin{pmatrix}" { Texutil.tex_use_ams(); BEGIN_PMATRIX }
  76. | "\\end{pmatrix}" { END_PMATRIX }
  77. | "\\begin{bmatrix}" { Texutil.tex_use_ams(); BEGIN_BMATRIX }
  78. | "\\end{bmatrix}" { END_BMATRIX }
  79. | "\\begin{Bmatrix}" { Texutil.tex_use_ams(); BEGIN_BBMATRIX }
  80. | "\\end{Bmatrix}" { END_BBMATRIX }
  81. | "\\begin{vmatrix}" { Texutil.tex_use_ams(); BEGIN_VMATRIX }
  82. | "\\end{vmatrix}" { END_VMATRIX }
  83. | "\\begin{Vmatrix}" { Texutil.tex_use_ams(); BEGIN_VVMATRIX }
  84. | "\\end{Vmatrix}" { END_VVMATRIX }
  85. | "\\begin{array}" { Texutil.tex_use_ams(); BEGIN_ARRAY }
  86. | "\\end{array}" { END_ARRAY }
  87. | "\\begin{align}" { Texutil.tex_use_ams(); BEGIN_ALIGN }
  88. | "\\end{align}" { END_ALIGN }
  89. | "\\begin{alignat}" { Texutil.tex_use_ams(); BEGIN_ALIGNAT }
  90. | "\\end{alignat}" { END_ALIGNAT }
  91. | "\\begin{smallmatrix}" { Texutil.tex_use_ams(); BEGIN_SMALLMATRIX }
  92. | "\\end{smallmatrix}" { END_SMALLMATRIX }
  93. | "\\begin{cases}" { Texutil.tex_use_ams(); BEGIN_CASES }
  94. | "\\end{cases}" { END_CASES }
  95. | '>' { LITERAL (HTMLABLEC(FONT_UFH,">"," > ")) }
  96. | '<' { LITERAL (HTMLABLEC(FONT_UFH,"<"," &lt; ")) }
  97. | '%' { LITERAL (HTMLABLEC(FONT_UFH,"\\%","%")) }
  98. | '$' { LITERAL (HTMLABLEC(FONT_UFH,"\\$","$")) }
  99. | '~' { LITERAL (HTMLABLE (FONT_UF, "~","&nbsp;")) }
  100. | '[' { DELIMITER (HTMLABLEC(FONT_UFH,"[","[")) }
  101. | ']' { SQ_CLOSE }
  102. | '{' { CURLY_OPEN }
  103. | '}' { CURLY_CLOSE }
  104. | '^' { SUP }
  105. | '_' { SUB }
  106. | eof { EOF }