123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310 |
- /* Pattern.java -- Compiled regular expression ready to be applied.
- Copyright (C) 2002, 2004, 2005, 2007, 2010
- Free Software Foundation, Inc.
- This file is part of GNU Classpath.
- GNU Classpath is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
- GNU Classpath is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with GNU Classpath; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA.
- Linking this library statically or dynamically with other modules is
- making a combined work based on this library. Thus, the terms and
- conditions of the GNU General Public License cover the whole
- combination.
- As a special exception, the copyright holders of this library give you
- permission to link this library with independent modules to produce an
- executable, regardless of the license terms of these independent
- modules, and to copy and distribute the resulting executable under
- terms of your choice, provided that you also meet, for each linked
- independent module, the terms and conditions of the license of that
- module. An independent module is a module which is not derived from
- or based on this library. If you modify this library, you may extend
- this exception to your version of the library, but you are not
- obligated to do so. If you do not wish to do so, delete this
- exception statement from your version. */
- package java.util.regex;
- import gnu.java.lang.CPStringBuilder;
- import gnu.java.util.regex.RE;
- import gnu.java.util.regex.REException;
- import gnu.java.util.regex.RESyntax;
- import java.io.Serializable;
- import java.util.ArrayList;
- /**
- * Compiled regular expression ready to be applied.
- *
- * @since 1.4
- */
- public final class Pattern implements Serializable
- {
- private static final long serialVersionUID = 5073258162644648461L;
- public static final int CANON_EQ = 128;
- public static final int CASE_INSENSITIVE = 2;
- public static final int COMMENTS = 4;
- public static final int DOTALL = 32;
- public static final int MULTILINE = 8;
- public static final int UNICODE_CASE = 64;
- public static final int UNIX_LINES = 1;
- private final String regex;
- private final int flags;
- private final RE re;
- private Pattern (String regex, int flags)
- throws PatternSyntaxException
- {
- this.regex = regex;
- this.flags = flags;
- RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
- int gnuFlags = 0;
- gnuFlags |= RE.REG_ICASE_USASCII;
- if ((flags & CASE_INSENSITIVE) != 0)
- gnuFlags |= RE.REG_ICASE;
- if ((flags & MULTILINE) != 0)
- {
- gnuFlags |= RE.REG_MULTILINE;
- syntax = new RESyntax(syntax);
- syntax.setLineSeparator(null);
- }
- if ((flags & DOTALL) != 0)
- gnuFlags |= RE.REG_DOT_NEWLINE;
- if ((flags & UNICODE_CASE) != 0)
- gnuFlags &= ~RE.REG_ICASE_USASCII;
- // not yet supported:
- // if ((flags & CANON_EQ) != 0) gnuFlags =
- if ((flags & UNIX_LINES) != 0)
- {
- // Use a syntax set with \n for linefeeds?
- syntax = new RESyntax(syntax);
- syntax.setLineSeparator("\n");
- }
- if ((flags & COMMENTS) != 0)
- {
- gnuFlags |= RE.REG_X_COMMENTS;
- }
- try
- {
- this.re = new RE(regex, gnuFlags, syntax);
- }
- catch (REException e)
- {
- PatternSyntaxException pse;
- pse = new PatternSyntaxException(e.getMessage(),
- regex, e.getPosition());
- pse.initCause(e);
- throw pse;
- }
- }
- // package private accessor method
- RE getRE()
- {
- return re;
- }
- /**
- * @param regex The regular expression
- *
- * @exception PatternSyntaxException If the expression's syntax is invalid
- */
- public static Pattern compile (String regex)
- throws PatternSyntaxException
- {
- return compile(regex, 0);
- }
- /**
- * @param regex The regular expression
- * @param flags The match flags, a bit mask
- *
- * @exception PatternSyntaxException If the expression's syntax is invalid
- * @exception IllegalArgumentException If bit values other than those
- * corresponding to the defined match flags are set in flags
- */
- public static Pattern compile (String regex, int flags)
- throws PatternSyntaxException
- {
- // FIXME: check which flags are really accepted
- if ((flags & ~0xEF) != 0)
- throw new IllegalArgumentException ();
- return new Pattern (regex, flags);
- }
- public int flags ()
- {
- return this.flags;
- }
- /**
- * @param regex The regular expression
- * @param input The character sequence to be matched
- *
- * @exception PatternSyntaxException If the expression's syntax is invalid
- */
- public static boolean matches (String regex, CharSequence input)
- {
- return compile(regex).matcher(input).matches();
- }
- /**
- * @param input The character sequence to be matched
- */
- public Matcher matcher (CharSequence input)
- {
- return new Matcher(this, input);
- }
- /**
- * @param input The character sequence to be matched
- */
- public String[] split (CharSequence input)
- {
- return split(input, 0);
- }
- /**
- * @param input The character sequence to be matched
- * @param limit The result threshold
- */
- public String[] split (CharSequence input, int limit)
- {
- Matcher matcher = new Matcher(this, input);
- ArrayList<String> list = new ArrayList<String>();
- int empties = 0;
- int count = 0;
- int start = 0;
- int end;
- boolean matched = matcher.find();
- while (matched && (limit <= 0 || count < limit - 1))
- {
- ++count;
- end = matcher.start();
- if (start == end)
- empties++;
- else
- {
- while (empties > 0)
- {
- list.add("");
- empties--;
- }
- String text = input.subSequence(start, end).toString();
- list.add(text);
- }
- start = matcher.end();
- matched = matcher.find();
- }
- // We matched nothing.
- if (!matched && count == 0)
- return new String[] { input.toString() };
- // Is the last token empty?
- boolean emptyLast = (start == input.length());
- // Can/Must we add empties or an extra last token at the end?
- if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
- {
- if (limit > list.size())
- {
- int max = limit - list.size();
- empties = (empties > max) ? max : empties;
- }
- while (empties > 0)
- {
- list.add("");
- empties--;
- }
- }
- // last token at end
- if (limit != 0 || (limit == 0 && !emptyLast))
- {
- String t = input.subSequence(start, input.length()).toString();
- if ("".equals(t) && limit == 0)
- { /* Don't add. */ }
- else
- list.add(t);
- }
- return list.toArray(new String[list.size()]);
- }
- public String pattern ()
- {
- return regex;
- }
- /**
- * Returns a literal pattern for the specified String.
- *
- * @param String to return a literal pattern for.
- * @return a literal pattern for the specified String.
- * @exception NullPointerException if str is null.
- * @since 1.5
- */
- public static String quote(String str)
- {
- int eInd = str.indexOf("\\E");
- if (eInd < 0)
- {
- // No need to handle backslashes.
- return "\\Q" + str + "\\E";
- }
- CPStringBuilder sb = new CPStringBuilder(str.length() + 16);
- sb.append("\\Q"); // start quote
- int pos = 0;
- do
- {
- // A backslash is quoted by another backslash;
- // 'E' is not needed to be quoted.
- sb.append(str.substring(pos, eInd))
- .append("\\E" + "\\\\" + "E" + "\\Q");
- pos = eInd + 2;
- } while ((eInd = str.indexOf("\\E", pos)) >= 0);
- sb.append(str.substring(pos, str.length()))
- .append("\\E"); // end quote
- return sb.toString();
- }
- /**
- * Return the regular expression used to construct this object.
- * @specnote Prior to JDK 1.5 this method had a different behavior
- * @since 1.5
- */
- public String toString()
- {
- return regex;
- }
- }
|