find_test.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package regexp
  5. import (
  6. "fmt"
  7. "strings"
  8. "testing"
  9. )
  10. // For each pattern/text pair, what is the expected output of each function?
  11. // We can derive the textual results from the indexed results, the non-submatch
  12. // results from the submatched results, the single results from the 'all' results,
  13. // and the byte results from the string results. Therefore the table includes
  14. // only the FindAllStringSubmatchIndex result.
  15. type FindTest struct {
  16. pat string
  17. text string
  18. matches [][]int
  19. }
  20. func (t FindTest) String() string {
  21. return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
  22. }
  23. var findTests = []FindTest{
  24. {``, ``, build(1, 0, 0)},
  25. {`^abcdefg`, "abcdefg", build(1, 0, 7)},
  26. {`a+`, "baaab", build(1, 1, 4)},
  27. {"abcd..", "abcdef", build(1, 0, 6)},
  28. {`a`, "a", build(1, 0, 1)},
  29. {`x`, "y", nil},
  30. {`b`, "abc", build(1, 1, 2)},
  31. {`.`, "a", build(1, 0, 1)},
  32. {`.*`, "abcdef", build(1, 0, 6)},
  33. {`^`, "abcde", build(1, 0, 0)},
  34. {`$`, "abcde", build(1, 5, 5)},
  35. {`^abcd$`, "abcd", build(1, 0, 4)},
  36. {`^bcd'`, "abcdef", nil},
  37. {`^abcd$`, "abcde", nil},
  38. {`a+`, "baaab", build(1, 1, 4)},
  39. {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
  40. {`[a-z]+`, "abcd", build(1, 0, 4)},
  41. {`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
  42. {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
  43. {`[^\n]+`, "abcd\n", build(1, 0, 4)},
  44. {`[日本語]+`, "日本語日本語", build(1, 0, 18)},
  45. {`日本語+`, "日本語", build(1, 0, 9)},
  46. {`日本語+`, "日本語語語語", build(1, 0, 18)},
  47. {`()`, "", build(1, 0, 0, 0, 0)},
  48. {`(a)`, "a", build(1, 0, 1, 0, 1)},
  49. {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
  50. {`(.*)`, "", build(1, 0, 0, 0, 0)},
  51. {`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
  52. {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
  53. {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
  54. {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
  55. {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
  56. {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
  57. {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
  58. {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
  59. {`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
  60. {`[.]`, ".", build(1, 0, 1)},
  61. {`/$`, "/abc/", build(1, 4, 5)},
  62. {`/$`, "/abc", nil},
  63. // multiple matches
  64. {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
  65. {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
  66. {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
  67. {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
  68. {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
  69. // fixed bugs
  70. {`ab$`, "cab", build(1, 1, 3)},
  71. {`axxb$`, "axxcb", nil},
  72. {`data`, "daXY data", build(1, 5, 9)},
  73. {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
  74. {`zx+`, "zzx", build(1, 1, 3)},
  75. {`ab$`, "abcab", build(1, 3, 5)},
  76. {`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
  77. {`(?:.|(?:.a))`, "", nil},
  78. {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
  79. {`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
  80. {`(a){0}`, "", build(1, 0, 0, -1, -1)},
  81. {`(?-s)(?:(?:^).)`, "\n", nil},
  82. {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
  83. {`(?:(?:^).)`, "\n", nil},
  84. {`\b`, "x", build(2, 0, 0, 1, 1)},
  85. {`\b`, "xx", build(2, 0, 0, 2, 2)},
  86. {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
  87. {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
  88. {`\B`, "x", nil},
  89. {`\B`, "xx", build(1, 1, 1)},
  90. {`\B`, "x y", nil},
  91. {`\B`, "xx yy", build(2, 1, 1, 4, 4)},
  92. // RE2 tests
  93. {`[^\S\s]`, "abcd", nil},
  94. {`[^\S[:space:]]`, "abcd", nil},
  95. {`[^\D\d]`, "abcd", nil},
  96. {`[^\D[:digit:]]`, "abcd", nil},
  97. {`(?i)\W`, "x", nil},
  98. {`(?i)\W`, "k", nil},
  99. {`(?i)\W`, "s", nil},
  100. // can backslash-escape any punctuation
  101. {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
  102. `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
  103. {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
  104. `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
  105. {"\\`", "`", build(1, 0, 1)},
  106. {"[\\`]+", "`", build(1, 0, 1)},
  107. // long set of matches (longer than startSize)
  108. {
  109. ".",
  110. "qwertyuiopasdfghjklzxcvbnm1234567890",
  111. build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
  112. 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
  113. 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
  114. 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
  115. },
  116. }
  117. // build is a helper to construct a [][]int by extracting n sequences from x.
  118. // This represents n matches with len(x)/n submatches each.
  119. func build(n int, x ...int) [][]int {
  120. ret := make([][]int, n)
  121. runLength := len(x) / n
  122. j := 0
  123. for i := range ret {
  124. ret[i] = make([]int, runLength)
  125. copy(ret[i], x[j:])
  126. j += runLength
  127. if j > len(x) {
  128. panic("invalid build entry")
  129. }
  130. }
  131. return ret
  132. }
  133. // First the simple cases.
  134. func TestFind(t *testing.T) {
  135. for _, test := range findTests {
  136. re := MustCompile(test.pat)
  137. if re.String() != test.pat {
  138. t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
  139. }
  140. result := re.Find([]byte(test.text))
  141. switch {
  142. case len(test.matches) == 0 && len(result) == 0:
  143. // ok
  144. case test.matches == nil && result != nil:
  145. t.Errorf("expected no match; got one: %s", test)
  146. case test.matches != nil && result == nil:
  147. t.Errorf("expected match; got none: %s", test)
  148. case test.matches != nil && result != nil:
  149. expect := test.text[test.matches[0][0]:test.matches[0][1]]
  150. if expect != string(result) {
  151. t.Errorf("expected %q got %q: %s", expect, result, test)
  152. }
  153. }
  154. }
  155. }
  156. func TestFindString(t *testing.T) {
  157. for _, test := range findTests {
  158. result := MustCompile(test.pat).FindString(test.text)
  159. switch {
  160. case len(test.matches) == 0 && len(result) == 0:
  161. // ok
  162. case test.matches == nil && result != "":
  163. t.Errorf("expected no match; got one: %s", test)
  164. case test.matches != nil && result == "":
  165. // Tricky because an empty result has two meanings: no match or empty match.
  166. if test.matches[0][0] != test.matches[0][1] {
  167. t.Errorf("expected match; got none: %s", test)
  168. }
  169. case test.matches != nil && result != "":
  170. expect := test.text[test.matches[0][0]:test.matches[0][1]]
  171. if expect != result {
  172. t.Errorf("expected %q got %q: %s", expect, result, test)
  173. }
  174. }
  175. }
  176. }
  177. func testFindIndex(test *FindTest, result []int, t *testing.T) {
  178. switch {
  179. case len(test.matches) == 0 && len(result) == 0:
  180. // ok
  181. case test.matches == nil && result != nil:
  182. t.Errorf("expected no match; got one: %s", test)
  183. case test.matches != nil && result == nil:
  184. t.Errorf("expected match; got none: %s", test)
  185. case test.matches != nil && result != nil:
  186. expect := test.matches[0]
  187. if expect[0] != result[0] || expect[1] != result[1] {
  188. t.Errorf("expected %v got %v: %s", expect, result, test)
  189. }
  190. }
  191. }
  192. func TestFindIndex(t *testing.T) {
  193. for _, test := range findTests {
  194. testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
  195. }
  196. }
  197. func TestFindStringIndex(t *testing.T) {
  198. for _, test := range findTests {
  199. testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
  200. }
  201. }
  202. func TestFindReaderIndex(t *testing.T) {
  203. for _, test := range findTests {
  204. testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
  205. }
  206. }
  207. // Now come the simple All cases.
  208. func TestFindAll(t *testing.T) {
  209. for _, test := range findTests {
  210. result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
  211. switch {
  212. case test.matches == nil && result == nil:
  213. // ok
  214. case test.matches == nil && result != nil:
  215. t.Errorf("expected no match; got one: %s", test)
  216. case test.matches != nil && result == nil:
  217. t.Fatalf("expected match; got none: %s", test)
  218. case test.matches != nil && result != nil:
  219. if len(test.matches) != len(result) {
  220. t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
  221. continue
  222. }
  223. for k, e := range test.matches {
  224. expect := test.text[e[0]:e[1]]
  225. if expect != string(result[k]) {
  226. t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
  227. }
  228. }
  229. }
  230. }
  231. }
  232. func TestFindAllString(t *testing.T) {
  233. for _, test := range findTests {
  234. result := MustCompile(test.pat).FindAllString(test.text, -1)
  235. switch {
  236. case test.matches == nil && result == nil:
  237. // ok
  238. case test.matches == nil && result != nil:
  239. t.Errorf("expected no match; got one: %s", test)
  240. case test.matches != nil && result == nil:
  241. t.Errorf("expected match; got none: %s", test)
  242. case test.matches != nil && result != nil:
  243. if len(test.matches) != len(result) {
  244. t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
  245. continue
  246. }
  247. for k, e := range test.matches {
  248. expect := test.text[e[0]:e[1]]
  249. if expect != result[k] {
  250. t.Errorf("expected %q got %q: %s", expect, result, test)
  251. }
  252. }
  253. }
  254. }
  255. }
  256. func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
  257. switch {
  258. case test.matches == nil && result == nil:
  259. // ok
  260. case test.matches == nil && result != nil:
  261. t.Errorf("expected no match; got one: %s", test)
  262. case test.matches != nil && result == nil:
  263. t.Errorf("expected match; got none: %s", test)
  264. case test.matches != nil && result != nil:
  265. if len(test.matches) != len(result) {
  266. t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
  267. return
  268. }
  269. for k, e := range test.matches {
  270. if e[0] != result[k][0] || e[1] != result[k][1] {
  271. t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
  272. }
  273. }
  274. }
  275. }
  276. func TestFindAllIndex(t *testing.T) {
  277. for _, test := range findTests {
  278. testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
  279. }
  280. }
  281. func TestFindAllStringIndex(t *testing.T) {
  282. for _, test := range findTests {
  283. testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
  284. }
  285. }
  286. // Now come the Submatch cases.
  287. func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
  288. if len(submatches) != len(result)*2 {
  289. t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
  290. return
  291. }
  292. for k := 0; k < len(submatches); k += 2 {
  293. if submatches[k] == -1 {
  294. if result[k/2] != nil {
  295. t.Errorf("match %d: expected nil got %q: %s", n, result, test)
  296. }
  297. continue
  298. }
  299. expect := test.text[submatches[k]:submatches[k+1]]
  300. if expect != string(result[k/2]) {
  301. t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
  302. return
  303. }
  304. }
  305. }
  306. func TestFindSubmatch(t *testing.T) {
  307. for _, test := range findTests {
  308. result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
  309. switch {
  310. case test.matches == nil && result == nil:
  311. // ok
  312. case test.matches == nil && result != nil:
  313. t.Errorf("expected no match; got one: %s", test)
  314. case test.matches != nil && result == nil:
  315. t.Errorf("expected match; got none: %s", test)
  316. case test.matches != nil && result != nil:
  317. testSubmatchBytes(&test, 0, test.matches[0], result, t)
  318. }
  319. }
  320. }
  321. func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
  322. if len(submatches) != len(result)*2 {
  323. t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
  324. return
  325. }
  326. for k := 0; k < len(submatches); k += 2 {
  327. if submatches[k] == -1 {
  328. if result[k/2] != "" {
  329. t.Errorf("match %d: expected nil got %q: %s", n, result, test)
  330. }
  331. continue
  332. }
  333. expect := test.text[submatches[k]:submatches[k+1]]
  334. if expect != result[k/2] {
  335. t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
  336. return
  337. }
  338. }
  339. }
  340. func TestFindStringSubmatch(t *testing.T) {
  341. for _, test := range findTests {
  342. result := MustCompile(test.pat).FindStringSubmatch(test.text)
  343. switch {
  344. case test.matches == nil && result == nil:
  345. // ok
  346. case test.matches == nil && result != nil:
  347. t.Errorf("expected no match; got one: %s", test)
  348. case test.matches != nil && result == nil:
  349. t.Errorf("expected match; got none: %s", test)
  350. case test.matches != nil && result != nil:
  351. testSubmatchString(&test, 0, test.matches[0], result, t)
  352. }
  353. }
  354. }
  355. func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
  356. if len(expect) != len(result) {
  357. t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
  358. return
  359. }
  360. for k, e := range expect {
  361. if e != result[k] {
  362. t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
  363. }
  364. }
  365. }
  366. func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
  367. switch {
  368. case test.matches == nil && result == nil:
  369. // ok
  370. case test.matches == nil && result != nil:
  371. t.Errorf("expected no match; got one: %s", test)
  372. case test.matches != nil && result == nil:
  373. t.Errorf("expected match; got none: %s", test)
  374. case test.matches != nil && result != nil:
  375. testSubmatchIndices(test, 0, test.matches[0], result, t)
  376. }
  377. }
  378. func TestFindSubmatchIndex(t *testing.T) {
  379. for _, test := range findTests {
  380. testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
  381. }
  382. }
  383. func TestFindStringSubmatchIndex(t *testing.T) {
  384. for _, test := range findTests {
  385. testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
  386. }
  387. }
  388. func TestFindReaderSubmatchIndex(t *testing.T) {
  389. for _, test := range findTests {
  390. testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
  391. }
  392. }
  393. // Now come the monster AllSubmatch cases.
  394. func TestFindAllSubmatch(t *testing.T) {
  395. for _, test := range findTests {
  396. result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
  397. switch {
  398. case test.matches == nil && result == nil:
  399. // ok
  400. case test.matches == nil && result != nil:
  401. t.Errorf("expected no match; got one: %s", test)
  402. case test.matches != nil && result == nil:
  403. t.Errorf("expected match; got none: %s", test)
  404. case len(test.matches) != len(result):
  405. t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
  406. case test.matches != nil && result != nil:
  407. for k, match := range test.matches {
  408. testSubmatchBytes(&test, k, match, result[k], t)
  409. }
  410. }
  411. }
  412. }
  413. func TestFindAllStringSubmatch(t *testing.T) {
  414. for _, test := range findTests {
  415. result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
  416. switch {
  417. case test.matches == nil && result == nil:
  418. // ok
  419. case test.matches == nil && result != nil:
  420. t.Errorf("expected no match; got one: %s", test)
  421. case test.matches != nil && result == nil:
  422. t.Errorf("expected match; got none: %s", test)
  423. case len(test.matches) != len(result):
  424. t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
  425. case test.matches != nil && result != nil:
  426. for k, match := range test.matches {
  427. testSubmatchString(&test, k, match, result[k], t)
  428. }
  429. }
  430. }
  431. }
  432. func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
  433. switch {
  434. case test.matches == nil && result == nil:
  435. // ok
  436. case test.matches == nil && result != nil:
  437. t.Errorf("expected no match; got one: %s", test)
  438. case test.matches != nil && result == nil:
  439. t.Errorf("expected match; got none: %s", test)
  440. case len(test.matches) != len(result):
  441. t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
  442. case test.matches != nil && result != nil:
  443. for k, match := range test.matches {
  444. testSubmatchIndices(test, k, match, result[k], t)
  445. }
  446. }
  447. }
  448. func TestFindAllSubmatchIndex(t *testing.T) {
  449. for _, test := range findTests {
  450. testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
  451. }
  452. }
  453. func TestFindAllStringSubmatchIndex(t *testing.T) {
  454. for _, test := range findTests {
  455. testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
  456. }
  457. }