test_regex.praat 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # test_regex.praat
  2. # djmw 20070528, 20070917, 20080122, 20110526
  3. printline test_regex
  4. debug = 0
  5. procedure match_index .s$ .match$ .i
  6. .il = index_regex (.s$, .match$)
  7. if debug = 1
  8. printline '.i' ('.il' = index_regex ("'.s$'", "'.match$'"))
  9. endif
  10. assert .il = .i; index_regex ("'.s$'", "'.match$'")
  11. endproc
  12. procedure match_rindex .s$ .match$ .i
  13. .ir = rindex_regex (.s$, .match$)
  14. if debug = 1
  15. printline '.i' ('.ir' = rindex_regex ("'.s$'", "'.match$'"))
  16. endif
  17. assert .ir = .i; rindex_regex ("'.s$'", "'.match$'")
  18. endproc
  19. procedure replace_re .string$ .search$ .replace$ .n .result$
  20. .r$ = replace_regex$ (.string$, .search$, .replace$, .n)
  21. if debug = 1
  22. printline '.result$' ("'.r$'" = replace_regex$ ("'.string$'", "'.search$'", "'.replace$'", "'.n'"))
  23. endif
  24. assert .r$ = .result$; '.result$' ("'.r$'" = replace_regex ("'.string$'", "'.search$'", "'.replace$'", "'.n'"))
  25. endproc
  26. # ordinary characters
  27. printline ---- # match characters
  28. alphabet$ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
  29. s$ = alphabet$
  30. sl = length (s$)
  31. sd$ = s$ + s$
  32. for i to 52
  33. match$ = mid$ (s$, i, 1)
  34. call match_index "'s$'" "'match$'" i
  35. ir = i + sl
  36. call match_rindex "'sd$'" "'match$'" ir
  37. endfor
  38. printline ---- # special characters # backslash \
  39. call match_index "cscscAaa\n" "aa\\n" 7
  40. call match_rindex "cscscAaa\n" "aa\\n" 7
  41. call match_index "cscscAa+a+" "b\+" 0
  42. call match_index "cscscAa+a+" "a\+" 7
  43. call match_rindex "cscscAa+a+" "b\+" 0
  44. call match_rindex "cscscAa+a+" "a\+" 9
  45. printline ---- # special characters # caret ^
  46. call match_index "cscscAa+a+" "^c" 1
  47. call match_rindex "cscscAa+a+" "^c" 1
  48. printline ---- # special characters # dollar $
  49. call match_index "cscscAa+ac" "c$" 10
  50. call match_rindex "cscscAa+ac" "c$" 10
  51. printline ---- # special characters # quantifier brackets {}
  52. call match_index "cscscAaa\n" "a{1}" 7
  53. call match_rindex "cscscAaa\n" "a{1}" 8
  54. call match_index "cscscAaa\n" "a{2}" 7
  55. call match_rindex "cscscAaa\n" "a{2}" 7
  56. call match_index "cscscAaa\n" "a{2,}" 7
  57. call match_rindex "cscscAaa\n" "a{2,}" 7
  58. call match_index "cscscAaa\n" "a{,2}" 1
  59. call match_rindex "cscscAaa\n" "a{,2}" 11
  60. call match_index "cscscAaa\n" "a{1,2}" 7
  61. call match_rindex "cscscAaa\n" "a{1,2}" 8
  62. printline ---- # special characters # open and close brackets []
  63. s$ = alphabet$
  64. sd$ = s$ + s$
  65. for i to 26
  66. # match [a-z], [b-z], ..., [z-z]
  67. bl$ = mid$ (s$, i, 1)
  68. match$ = "[" + bl$ + "-z]"
  69. call match_index "'s$'" "'match$'" i
  70. call match_rindex "'sd$'" "'match$'" 78
  71. # now for uppercase
  72. bu$ = mid$ (s$, i+26, 1)
  73. match$ = "[" + bu$ + "-Z]"
  74. il = i+26
  75. call match_index "'s$'" "'match$'" il
  76. ir = 104
  77. call match_rindex "'sd$'" "'match$'" ir
  78. # match [a-a], [b-b], [z-z]
  79. match$ = "[" + bl$ + "-" + bl$ +"]"
  80. call match_index "'s$'" "'match$'" i
  81. ir = 52+i
  82. call match_rindex "'sd$'" "'match$'" ir
  83. endfor
  84. printline ---- # special characters # grouping characters ()
  85. call match_index "c" "(ab)" 0
  86. call match_index "cc" "(ab)" 0
  87. call match_index "ccc" "(ab)" 0
  88. call match_index "ac" "(ab)" 0
  89. call match_index "acc" "(ab)" 0
  90. call match_index "accc" "(ab)" 0
  91. call match_index "aca" "(ab)" 0
  92. call match_index "acca" "(ab)" 0
  93. call match_index "accca" "(ab)" 0
  94. call match_index "ababcccc" "(ab)" 1
  95. call match_rindex "ababcccc" "(ab)" 3
  96. call match_index "cababcccc" "(ab)" 2
  97. call match_rindex "cababcccc" "(ab)" 4
  98. call match_index "cccccccabab" "(ab)" 8
  99. call match_rindex "cccccccabab" "(ab)" 10
  100. call match_index "ababcccc" "(ab)\1" 1
  101. call match_rindex "ababcccc" "(ab)\1" 1
  102. printline ---- # special characters # dot .
  103. call match_index "ababcccc" ".d" 0
  104. call match_rindex "ababcccc" ".d" 0
  105. call match_index "ababcccc" ".b" 1
  106. call match_rindex "ababcccc" ".b" 3
  107. call match_index "ababcccc" ".a" 2
  108. call match_rindex "ababcccc" ".a" 2
  109. call match_index "ababcccc" ".c" 4
  110. call match_rindex "ababcccc" ".c" 7
  111. printline ---- # special characters # star *
  112. call match_index "ababccccd" "b*" 1
  113. call match_index "ababccccd" "ab*" 1
  114. call match_rindex "ababccccd" "ab*" 3
  115. call match_index "ababccccd" "^.*$" 1
  116. printline ---- # special characters # plus +
  117. call match_index "ababccccd" "b+" 2
  118. call match_rindex "ababccccd" "b+" 4
  119. printline ---- # special characters # question mark ?
  120. call match_index "ababccccd" "ab?" 1
  121. call match_rindex "ababccccd" "ab?" 3
  122. printline ---- # quantifiers # *
  123. call replace_re "ab" "(ab)c*" "x" 0 x
  124. call replace_re "abc" "(ab)c*" "x" 0 x
  125. call replace_re "abcc" "(ab)c*" "x" 0 x
  126. call replace_re "abcccd" "(ab)c*" "x" 0 xd
  127. call replace_re "ab" "(ab)c+" "x" 0 ab
  128. call replace_re "abc" "(ab)c+" "x" 0 x
  129. call replace_re "abcc" "(ab)c+" "x" 0 x
  130. call replace_re "abcccd" "(ab)c+" "x" 0 xd
  131. call replace_re "yyabcccd" "(ab)c+" "x" 0 yyxd
  132. printline ---- # non greedy ?
  133. call replace_re "abc" "(ab)c*?" "x" 0 xc
  134. call replace_re "abcc" "(ab)c*?" "x" 0 xcc
  135. call replace_re "abcccd" "(ab)c*?" "x" 0 xcccd
  136. call replace_re "abcc" "(ab)c+?" "x" 0 xc
  137. call replace_re "abcccd" "(ab)c+?" "x" 0 xccd
  138. printline ---- # anchors
  139. call replace_re "ababccccd" "ab" "x" 1 xabccccd
  140. call replace_re "ababccccd" "^ab" "x" 1 xabccccd
  141. printline --- # empty string
  142. call replace_re "" ".*" "aaa" 0 aaa
  143. call replace_re "" ".*" "aaa" 1 aaa
  144. call replace_re "" ".*" "aaa" 10 aaa
  145. # special constructs with parentheses
  146. # special control characters
  147. # convenience escapes
  148. # octal and hexadecima escapes
  149. # sustitution special characters
  150. printline ---- # replaced string much longer than original
  151. call replace_re "ababababababababababababababababababab" "b" "xxxxx" 1 axxxxxabababababababababababababababababab
  152. # The returned value is "xxccccd" instead of "xabccccd".
  153. call replace_re "ababccccd" "^ab" "x" 0 xabccccd
  154. printline --------- PREVIOUS BUGS < 5.0.4
  155. printline ---- # replace by empty string ""
  156. # Bugs in praat versions before 5.0.4
  157. # Returned empty string ""
  158. call replace_re "abc " " $" "" 1 abc
  159. # Returned "xxccccd" instead of "xabccccd".
  160. call replace_re "ababccccd" "^ab" "x" 0 xabccccd
  161. printline --------- PREVIOUS BUGS < 5.0.5
  162. call replace_re "hallo" "l" "b" 0 habbo
  163. call replace_re "hallo" "." "&&" 0 hhaalllloo
  164. printline --------- PREVIOUS BUGS --- END
  165. printline --------- BUG in 5.0.40
  166. call replace_re "c " ".*" "aaaa" 0 aaaa
  167. printline --------- PREVIOUS BUGS --- END
  168. printline test_regex OK