123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package syntax
- import "testing"
- var simplifyTests = []struct {
- Regexp string
- Simple string
- }{
- // Already-simple constructs
- {`a`, `a`},
- {`ab`, `ab`},
- {`a|b`, `[a-b]`},
- {`ab|cd`, `ab|cd`},
- {`(ab)*`, `(ab)*`},
- {`(ab)+`, `(ab)+`},
- {`(ab)?`, `(ab)?`},
- {`.`, `(?s:.)`},
- {`^`, `^`},
- {`$`, `$`},
- {`[ac]`, `[ac]`},
- {`[^ac]`, `[^ac]`},
- // Posix character classes
- {`[[:alnum:]]`, `[0-9A-Za-z]`},
- {`[[:alpha:]]`, `[A-Za-z]`},
- {`[[:blank:]]`, `[\t ]`},
- {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`},
- {`[[:digit:]]`, `[0-9]`},
- {`[[:graph:]]`, `[!-~]`},
- {`[[:lower:]]`, `[a-z]`},
- {`[[:print:]]`, `[ -~]`},
- {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"},
- {`[[:space:]]`, `[\t-\r ]`},
- {`[[:upper:]]`, `[A-Z]`},
- {`[[:xdigit:]]`, `[0-9A-Fa-f]`},
- // Perl character classes
- {`\d`, `[0-9]`},
- {`\s`, `[\t-\n\f-\r ]`},
- {`\w`, `[0-9A-Z_a-z]`},
- {`\D`, `[^0-9]`},
- {`\S`, `[^\t-\n\f-\r ]`},
- {`\W`, `[^0-9A-Z_a-z]`},
- {`[\d]`, `[0-9]`},
- {`[\s]`, `[\t-\n\f-\r ]`},
- {`[\w]`, `[0-9A-Z_a-z]`},
- {`[\D]`, `[^0-9]`},
- {`[\S]`, `[^\t-\n\f-\r ]`},
- {`[\W]`, `[^0-9A-Z_a-z]`},
- // Posix repetitions
- {`a{1}`, `a`},
- {`a{2}`, `aa`},
- {`a{5}`, `aaaaa`},
- {`a{0,1}`, `a?`},
- // The next three are illegible because Simplify inserts (?:)
- // parens instead of () parens to avoid creating extra
- // captured subexpressions. The comments show a version with fewer parens.
- {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)?
- {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)?
- {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
- {`a{0,2}`, `(?:aa?)?`}, // (aa?)?
- {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)?
- {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)?
- {`a{0,}`, `a*`},
- {`a{1,}`, `a+`},
- {`a{2,}`, `aa+`},
- {`a{5,}`, `aaaaa+`},
- // Test that operators simplify their arguments.
- {`(?:a{1,}){1,}`, `a+`},
- {`(a{1,}b{1,})`, `(a+b+)`},
- {`a{1,}|b{1,}`, `a+|b+`},
- {`(?:a{1,})*`, `(?:a+)*`},
- {`(?:a{1,})+`, `a+`},
- {`(?:a{1,})?`, `(?:a+)?`},
- {``, `(?:)`},
- {`a{0}`, `(?:)`},
- // Character class simplification
- {`[ab]`, `[a-b]`},
- {`[a-za-za-z]`, `[a-z]`},
- {`[A-Za-zA-Za-z]`, `[A-Za-z]`},
- {`[ABCDEFGH]`, `[A-H]`},
- {`[AB-CD-EF-GH]`, `[A-H]`},
- {`[W-ZP-XE-R]`, `[E-Z]`},
- {`[a-ee-gg-m]`, `[a-m]`},
- {`[a-ea-ha-m]`, `[a-m]`},
- {`[a-ma-ha-e]`, `[a-m]`},
- {`[a-zA-Z0-9 -~]`, `[ -~]`},
- // Empty character classes
- {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
- // Full character classes
- {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`},
- // Unicode case folding.
- {`(?i)A`, `(?i:A)`},
- {`(?i)a`, `(?i:A)`},
- {`(?i)[A]`, `(?i:A)`},
- {`(?i)[a]`, `(?i:A)`},
- {`(?i)K`, `(?i:K)`},
- {`(?i)k`, `(?i:K)`},
- {`(?i)\x{212a}`, "(?i:K)"},
- {`(?i)[K]`, "[Kk\u212A]"},
- {`(?i)[k]`, "[Kk\u212A]"},
- {`(?i)[\x{212a}]`, "[Kk\u212A]"},
- {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
- {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
- {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`},
- // Empty string as a regular expression.
- // The empty string must be preserved inside parens in order
- // to make submatches work right, so these tests are less
- // interesting than they might otherwise be. String inserts
- // explicit (?:) in place of non-parenthesized empty strings,
- // to make them easier to spot for other parsers.
- {`(a|b|)`, `([a-b]|(?:))`},
- {`(|)`, `()`},
- {`a()`, `a()`},
- {`(()|())`, `(()|())`},
- {`(a|)`, `(a|(?:))`},
- {`ab()cd()`, `ab()cd()`},
- {`()`, `()`},
- {`()*`, `()*`},
- {`()+`, `()+`},
- {`()?`, `()?`},
- {`(){0}`, `(?:)`},
- {`(){1}`, `()`},
- {`(){1,}`, `()+`},
- {`(){0,2}`, `(?:()()?)?`},
- }
- func TestSimplify(t *testing.T) {
- for _, tt := range simplifyTests {
- re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine)
- if err != nil {
- t.Errorf("Parse(%#q) = error %v", tt.Regexp, err)
- continue
- }
- s := re.Simplify().String()
- if s != tt.Simple {
- t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple)
- }
- }
- }
|