tokenizer.ml 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. open Printf
  2. open Location
  3. open Utf8
  4. type token = | Lpar | Rpar | Str of string
  5. type located_token = token * location
  6. type pos_tracking_stream = char Stream.t * (position ref) * (int ref)
  7. let current_position (_, pos, _) = !pos
  8. let init_tracking_stream stream = stream , (ref (0,0)) , ref 0
  9. let peek (stream, _, _) = Stream.peek stream
  10. let junk (stream, pos, unc (*until-next-char*) ) : unit = let (row, col) = !pos in
  11. (match Stream.peek stream with
  12. | Some c ->
  13. if !unc > 0 then unc := !unc - 1 else
  14. begin match c with
  15. | '\n' -> pos := (row+1, 0) ; unc := 0
  16. | c -> unc := (char_length c) - 1; pos := (row, col+1)
  17. end
  18. | None -> ());
  19. Stream.junk stream
  20. let fetch_in_stream file_name = init_tracking_stream (Stream.of_channel (open_in file_name))
  21. let rec generate_token tracking_stream : located_token option = match peek tracking_stream with
  22. | None -> None
  23. | Some c -> let cp = (current_position tracking_stream) in begin match c with
  24. | '(' -> junk tracking_stream; Some (Lpar, Single cp)
  25. | ')' -> junk tracking_stream; Some (Rpar, Single cp)
  26. | ' ' | '\n' -> junk tracking_stream; generate_token tracking_stream
  27. | _ ->
  28. let rec parse_atom str = match peek tracking_stream with
  29. | None -> Str str
  30. | Some xd -> begin match xd with
  31. | ')' | '(' | ' ' | '\n' -> Str str
  32. | o -> junk tracking_stream ; parse_atom (str ^ (String.make 1 o))
  33. end
  34. in
  35. let atom = parse_atom "" in
  36. Some (atom, Range (cp, back_one_char (current_position tracking_stream)))
  37. end
  38. let tokenize char_stream : located_token Stream.t = Stream.from (fun _ -> (generate_token char_stream))
  39. let sprintf_token = function
  40. |Lpar -> "("
  41. |Rpar -> ")"
  42. |Str s -> s
  43. let print_token (token, location) =
  44. printf "%s: %s\n" (sprintf_token token) (sprintf_location location)
  45. (*let main = let stream = fetch_in_stream "/tmp/xd" in
  46. let tokens = tokenize stream in
  47. Stream.iter print_token tokens*)