1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012 |
- /* RuleBasedCollator.java -- Concrete Collator Class
- Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
- This file is part of GNU Classpath.
- GNU Classpath is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
- GNU Classpath is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with GNU Classpath; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA.
- Linking this library statically or dynamically with other modules is
- making a combined work based on this library. Thus, the terms and
- conditions of the GNU General Public License cover the whole
- combination.
- As a special exception, the copyright holders of this library give you
- permission to link this library with independent modules to produce an
- executable, regardless of the license terms of these independent
- modules, and to copy and distribute the resulting executable under
- terms of your choice, provided that you also meet, for each linked
- independent module, the terms and conditions of the license of that
- module. An independent module is a module which is not derived from
- or based on this library. If you modify this library, you may extend
- this exception to your version of the library, but you are not
- obligated to do so. If you do not wish to do so, delete this
- exception statement from your version. */
- package java.text;
- import gnu.classpath.NotImplementedException;
- import java.util.ArrayList;
- import java.util.HashMap;
- /* Written using "Java Class Libraries", 2nd edition, plus online
- * API docs for JDK 1.2 from http://www.javasoft.com.
- * Status: Believed complete and correct
- */
- /**
- * This class is a concrete subclass of <code>Collator</code> suitable
- * for string collation in a wide variety of languages. An instance of
- * this class is normally returned by the <code>getInstance</code> method
- * of <code>Collator</code> with rules predefined for the requested
- * locale. However, an instance of this class can be created manually
- * with any desired rules.
- * <p>
- * Rules take the form of a <code>String</code> with the following syntax
- * <ul>
- * <li> Modifier: '@'</li>
- * <li> Relation: '<' | ';' | ',' | '=' : <text></li>
- * <li> Reset: '&' : <text></li>
- * </ul>
- * The modifier character indicates that accents sort backward as is the
- * case with French. The modifier applies to all rules <b>after</b>
- * the modifier but before the next primary sequence. If placed at the end
- * of the sequence if applies to all unknown accented character.
- * The relational operators specify how the text
- * argument relates to the previous term. The relation characters have
- * the following meanings:
- * <ul>
- * <li>'<' - The text argument is greater than the prior term at the primary
- * difference level.</li>
- * <li>';' - The text argument is greater than the prior term at the secondary
- * difference level.</li>
- * <li>',' - The text argument is greater than the prior term at the tertiary
- * difference level.</li>
- * <li>'=' - The text argument is equal to the prior term</li>
- * </ul>
- * <p>
- * As for the text argument itself, this is any sequence of Unicode
- * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
- * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
- * desired, they must be enclosed in single quotes. If any whitespace is
- * encountered, it is ignored. (For example, "a b" is equal to "ab").
- * <p>
- * The reset operation inserts the following rule at the point where the
- * text argument to it exists in the previously declared rule string. This
- * makes it easy to add new rules to an existing string by simply including
- * them in a reset sequence at the end. Note that the text argument, or
- * at least the first character of it, must be present somewhere in the
- * previously declared rules in order to be inserted properly. If this
- * is not satisfied, a <code>ParseException</code> will be thrown.
- * <p>
- * This system of configuring <code>RuleBasedCollator</code> is needlessly
- * complex and the people at Taligent who developed it (along with the folks
- * at Sun who accepted it into the Java standard library) deserve a slow
- * and agonizing death.
- * <p>
- * Here are a couple of example of rule strings:
- * <p>
- * "< a < b < c" - This string says that a is greater than b which is
- * greater than c, with all differences being primary differences.
- * <p>
- * "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with
- * a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and
- * 'A' during a primary strength comparison. But 'B' is greater than 'b'
- * under a tertiary strength comparison.
- * <p>
- * "< a < c & a < b " - This sequence is identical in function to the
- * "< a < b < c" rule string above. The '&' reset symbol indicates that
- * the rule "< b" is to be inserted after the text argument "a" in the
- * previous rule string segment.
- * <p>
- * "< a < b & y < z" - This is an error. The character 'y' does not appear
- * anywhere in the previous rule string segment so the rule following the
- * reset rule cannot be inserted.
- * <p>
- * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following
- * "< a & A < E & e < f & F".
- * <p>
- * For a description of the various comparison strength types, see the
- * documentation for the <code>Collator</code> class.
- * <p>
- * As an additional complication to this already overly complex rule scheme,
- * if any characters precede the first rule, these characters are considered
- * ignorable. They will be treated as if they did not exist during
- * comparisons. For example, "- < a < b ..." would make '-' an ignorable
- * character such that the strings "high-tech" and "hightech" would
- * be considered identical.
- * <p>
- * A <code>ParseException</code> will be thrown for any of the following
- * conditions:
- * <ul>
- * <li>Unquoted punctuation characters in a text argument.</li>
- * <li>A relational or reset operator not followed by a text argument</li>
- * <li>A reset operator where the text argument is not present in
- * the previous rule string section.</li>
- * </ul>
- *
- * @author Aaron M. Renn (arenn@urbanophile.com)
- * @author Tom Tromey (tromey@cygnus.com)
- * @author Guilhem Lavaux (guilhem@kaffe.org)
- */
- public class RuleBasedCollator extends Collator
- {
- /**
- * This class describes what rank has a character (or a sequence of characters)
- * in the lexicographic order. Each element in a rule has a collation element.
- */
- static final class CollationElement
- {
- final String key;
- final int primary;
- final short secondary;
- final short tertiary;
- final short equality;
- final boolean ignore;
- final String expansion;
- CollationElement(String key, int primary, short secondary, short tertiary,
- short equality, String expansion, boolean ignore)
- {
- this.key = key;
- this.primary = primary;
- this.secondary = secondary;
- this.tertiary = tertiary;
- this.equality = equality;
- this.ignore = ignore;
- this.expansion = expansion;
- }
- int getValue()
- {
- return (primary << 16) + (secondary << 8) + tertiary;
- }
- }
- /**
- * Basic collation instruction (internal format) to build the series of
- * collation elements. It contains an instruction which specifies the new
- * state of the generator. The sequence of instruction should not contain
- * RESET (it is used by
- * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
- * as a temporary state while merging two sets of instructions.
- */
- private static final class CollationSorter
- {
- static final int GREATERP = 0;
- static final int GREATERS = 1;
- static final int GREATERT = 2;
- static final int EQUAL = 3;
- static final int RESET = 4;
- static final int INVERSE_SECONDARY = 5;
- final int comparisonType;
- final String textElement;
- final int hashText;
- final int offset;
- final boolean ignore;
- String expansionOrdering;
- private CollationSorter(final int comparisonType, final String textElement,
- final int offset, final boolean ignore)
- {
- this.comparisonType = comparisonType;
- this.textElement = textElement;
- this.offset = offset;
- this.ignore = ignore;
- hashText = textElement.hashCode();
- }
- }
- /**
- * This is the original rule string.
- */
- private String rules;
- /**
- * This is the table of collation element values
- */
- private CollationElement[] ce_table;
- /**
- * Quick-prefix finder.
- */
- HashMap<String,CollationElement> prefix_tree;
- /**
- * This is the value of the last sequence entered into
- * <code>ce_table</code>. It is used to compute the
- * ordering value of unspecified character.
- */
- private int last_primary_value;
- /**
- * This is the value of the last secondary sequence of the
- * primary 0, entered into
- * <code>ce_table</code>. It is used to compute the
- * ordering value of an unspecified accented character.
- */
- private int last_tertiary_value;
- /**
- * This variable is true if accents need to be sorted
- * in the other direction.
- */
- private boolean inverseAccentComparison;
- /**
- * This collation element is special to unknown sequence.
- * The JDK uses it to mark and sort the characters which has
- * no collation rules.
- */
- static final CollationElement SPECIAL_UNKNOWN_SEQ =
- new CollationElement("", (short) 32767, (short) 0, (short) 0,
- (short) 0, null, false);
- /**
- * This method initializes a new instance of <code>RuleBasedCollator</code>
- * with the specified collation rules. Note that an application normally
- * obtains an instance of <code>RuleBasedCollator</code> by calling the
- * <code>getInstance</code> method of <code>Collator</code>. That method
- * automatically loads the proper set of rules for the desired locale.
- *
- * @param rules The collation rule string.
- *
- * @exception ParseException If the rule string contains syntax errors.
- */
- public RuleBasedCollator(String rules) throws ParseException
- {
- if (rules.equals(""))
- throw new ParseException("empty rule set", 0);
- this.rules = rules;
- buildCollationVector(parseString(rules));
- buildPrefixAccess();
- }
- /**
- * This method returns the number of common characters at the beginning
- * of the string of the two parameters.
- *
- * @param prefix A string considered as a prefix to test against
- * the other string.
- * @param s A string to test the prefix against.
- * @return The number of common characters.
- */
- static int findPrefixLength(String prefix, String s)
- {
- int index;
- int len = prefix.length();
- for (index = 0; index < len && index < s.length(); ++index)
- {
- if (prefix.charAt(index) != s.charAt(index))
- return index;
- }
- return index;
- }
- /**
- * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
- * checks whether it is possible to find an anchor point for the rules to be merged and
- * then insert them at that precise point.
- *
- * @param offset Offset in the string containing rules of the beginning of the rules
- * being merged in.
- * @param starter Text of the rules being merged.
- * @param main Repository of all already parsed rules.
- * @param patch Rules to be merged into the repository.
- * @throws ParseException if it is impossible to find an anchor point for the new rules.
- */
- private void mergeRules(int offset, String starter, ArrayList<CollationSorter> main,
- ArrayList<CollationSorter> patch)
- throws ParseException
- {
- int insertion_point = -1;
- int max_length = 0;
- /* We must check that no rules conflict with another already present. If it
- * is the case delete the old rule.
- */
- /* For the moment good old O(N^2) algorithm.
- */
- for (int i = 0; i < patch.size(); i++)
- {
- int j = 0;
- while (j < main.size())
- {
- CollationSorter rule1 = patch.get(i);
- CollationSorter rule2 = main.get(j);
- if (rule1.textElement.equals(rule2.textElement))
- main.remove(j);
- else
- j++;
- }
- }
- // Find the insertion point... O(N)
- for (int i = 0; i < main.size(); i++)
- {
- CollationSorter sorter = main.get(i);
- int length = findPrefixLength(starter, sorter.textElement);
- if (length > max_length)
- {
- max_length = length;
- insertion_point = i+1;
- }
- }
- if (insertion_point < 0)
- throw new ParseException("no insertion point found for " + starter, offset);
- if (max_length < starter.length())
- {
- /*
- * We need to expand the first entry. It must be sorted
- * like if it was the reference key itself (like the spec
- * said. So the first entry is special: the element is
- * replaced by the specified text element for the sorting.
- * This text replace the old one for comparisons. However
- * to preserve the behaviour we replace the first key (corresponding
- * to the found prefix) by a new code rightly ordered in the
- * sequence. The rest of the subsequence must be appended
- * to the end of the sequence.
- */
- CollationSorter sorter = patch.get(0);
- sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
- main.add(insertion_point, sorter);
- /*
- * This is a new set of rules. Append to the list.
- */
- patch.remove(0);
- insertion_point++;
- }
- // Now insert all elements of patch at the insertion point.
- for (int i = 0; i < patch.size(); i++)
- main.add(i+insertion_point, patch.get(i));
- }
- /**
- * This method parses a string and build a set of sorting instructions. The parsing
- * may only be partial on the case the rules are to be merged sometime later.
- *
- * @param stop_on_reset If this parameter is true then the parser stops when it
- * encounters a reset instruction. In the other case, it tries to parse the subrules
- * and merged it in the same repository.
- * @param v Output vector for the set of instructions.
- * @param base_offset Offset in the string to begin parsing.
- * @param rules Rules to be parsed.
- * @return -1 if the parser reached the end of the string, an integer representing the
- * offset in the string at which it stopped parsing.
- * @throws ParseException if something turned wrong during the parsing. To get details
- * decode the message.
- */
- private int subParseString(boolean stop_on_reset, ArrayList<CollationSorter> v,
- int base_offset, String rules)
- throws ParseException
- {
- boolean ignoreChars = (base_offset == 0);
- int operator = -1;
- StringBuilder sb = new StringBuilder();
- boolean doubleQuote = false;
- boolean eatingChars = false;
- boolean nextIsModifier = false;
- boolean isModifier = false;
- int i;
- main_parse_loop:
- for (i = 0; i < rules.length(); i++)
- {
- char c = rules.charAt(i);
- int type = -1;
- if (!eatingChars &&
- ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
- continue;
- isModifier = nextIsModifier;
- nextIsModifier = false;
- if (eatingChars && c != '\'')
- {
- doubleQuote = false;
- sb.append(c);
- continue;
- }
- if (doubleQuote && eatingChars)
- {
- sb.append(c);
- doubleQuote = false;
- continue;
- }
- switch (c)
- {
- case '!':
- throw new ParseException
- ("Modifier '!' is not yet supported by Classpath", i + base_offset);
- case '<':
- type = CollationSorter.GREATERP;
- break;
- case ';':
- type = CollationSorter.GREATERS;
- break;
- case ',':
- type = CollationSorter.GREATERT;
- break;
- case '=':
- type = CollationSorter.EQUAL;
- break;
- case '\'':
- eatingChars = !eatingChars;
- doubleQuote = true;
- break;
- case '@':
- if (ignoreChars)
- throw new ParseException
- ("comparison list has not yet been started. You may only use"
- + "(<,;=&)", i + base_offset);
- // Inverse the order of secondaries from now on.
- nextIsModifier = true;
- type = CollationSorter.INVERSE_SECONDARY;
- break;
- case '&':
- type = CollationSorter.RESET;
- if (stop_on_reset)
- break main_parse_loop;
- break;
- default:
- if (operator < 0)
- throw new ParseException
- ("operator missing at " + (i + base_offset), i + base_offset);
- if (! eatingChars
- && ((c >= 0x21 && c <= 0x2F)
- || (c >= 0x3A && c <= 0x40)
- || (c >= 0x5B && c <= 0x60)
- || (c >= 0x7B && c <= 0x7E)))
- throw new ParseException
- ("unquoted punctuation character '" + c + "'", i + base_offset);
- //type = ignoreChars ? CollationSorter.IGNORE : -1;
- sb.append(c);
- break;
- }
- if (type < 0)
- continue;
- if (operator < 0)
- {
- operator = type;
- continue;
- }
- if (sb.length() == 0 && !isModifier)
- throw new ParseException
- ("text element empty at " + (i+base_offset), i+base_offset);
- if (operator == CollationSorter.RESET)
- {
- /* Reposition in the sorting list at the position
- * indicated by the text element.
- */
- String subrules = rules.substring(i);
- ArrayList<CollationSorter> sorted_rules = new ArrayList<CollationSorter>();
- int idx;
- // Parse the subrules but do not iterate through all
- // sublist. This is the privilege of the first call.
- idx = subParseString(true, sorted_rules, base_offset+i, subrules);
- // Merge new parsed rules into the list.
- mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
- sb.setLength(0);
- // Reset state to none.
- operator = -1;
- type = -1;
- // We have found a new subrule at 'idx' but it has not been parsed.
- if (idx >= 0)
- {
- i += idx-1;
- continue main_parse_loop;
- }
- else
- // No more rules.
- break main_parse_loop;
- }
- String textElement = sb.toString();
- if (operator == CollationSorter.GREATERP)
- ignoreChars = false;
- CollationSorter sorter = new CollationSorter(operator, textElement,
- base_offset + rules.length(),
- ignoreChars);
- sb.setLength(0);
- v.add(sorter);
- operator = type;
- }
- if (operator >= 0)
- {
- int pos = rules.length() + base_offset;
- if ((sb.length() != 0 && nextIsModifier)
- || (sb.length() == 0 && !nextIsModifier && !eatingChars))
- throw new ParseException("text element empty at " + pos, pos);
- if (operator == CollationSorter.GREATERP)
- ignoreChars = false;
- CollationSorter sorter = new CollationSorter(operator, sb.toString(),
- base_offset+pos, ignoreChars);
- v.add(sorter);
- }
- if (i == rules.length())
- return -1;
- else
- return i;
- }
- /**
- * This method creates a copy of this object.
- *
- * @return A copy of this object.
- */
- public Object clone()
- {
- return super.clone();
- }
- /**
- * This method completely parses a string 'rules' containing sorting rules.
- *
- * @param rules String containing the rules to be parsed.
- * @return A set of sorting instructions stored in a Vector.
- * @throws ParseException if something turned wrong during the parsing. To get details
- * decode the message.
- */
- private ArrayList<CollationSorter> parseString(String rules)
- throws ParseException
- {
- ArrayList<CollationSorter> v = new ArrayList<CollationSorter>();
- // result of the first subParseString is not absolute (may be -1 or a
- // positive integer). But we do not care.
- subParseString(false, v, 0, rules);
- return v;
- }
- /**
- * This method uses the sorting instructions built by {@link #parseString}
- * to build collation elements which can be directly used to sort strings.
- *
- * @param parsedElements Parsed instructions stored in a ArrayList.
- * @throws ParseException if the order of the instructions are not valid.
- */
- private void buildCollationVector(ArrayList<CollationSorter> parsedElements)
- throws ParseException
- {
- int primary_seq = 0;
- int last_tertiary_seq = 0;
- short secondary_seq = 0;
- short tertiary_seq = 0;
- short equality_seq = 0;
- boolean inverseComparisons = false;
- final boolean DECREASING = false;
- final boolean INCREASING = true;
- boolean secondaryType = INCREASING;
- ArrayList<CollationElement> v = new ArrayList<CollationElement>();
- // elts is completely sorted.
- element_loop:
- for (int i = 0; i < parsedElements.size(); i++)
- {
- CollationSorter elt = parsedElements.get(i);
- switch (elt.comparisonType)
- {
- case CollationSorter.GREATERP:
- primary_seq++;
- if (inverseComparisons)
- {
- secondary_seq = Short.MAX_VALUE;
- secondaryType = DECREASING;
- }
- else
- {
- secondary_seq = 0;
- secondaryType = INCREASING;
- }
- tertiary_seq = 0;
- equality_seq = 0;
- inverseComparisons = false;
- break;
- case CollationSorter.GREATERS:
- if (secondaryType == DECREASING)
- secondary_seq--;
- else
- secondary_seq++;
- tertiary_seq = 0;
- equality_seq = 0;
- break;
- case CollationSorter.INVERSE_SECONDARY:
- inverseComparisons = true;
- continue element_loop;
- case CollationSorter.GREATERT:
- tertiary_seq++;
- if (primary_seq == 0)
- last_tertiary_seq = tertiary_seq;
- equality_seq = 0;
- break;
- case CollationSorter.EQUAL:
- equality_seq++;
- break;
- case CollationSorter.RESET:
- throw new ParseException
- ("Invalid reached state 'RESET'. Internal error", elt.offset);
- default:
- throw new ParseException
- ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
- }
- v.add(new CollationElement(elt.textElement, primary_seq,
- secondary_seq, tertiary_seq,
- equality_seq, elt.expansionOrdering, elt.ignore));
- }
- this.inverseAccentComparison = inverseComparisons;
- ce_table = v.toArray(new CollationElement[v.size()]);
- last_primary_value = primary_seq+1;
- last_tertiary_value = last_tertiary_seq+1;
- }
- /**
- * Build a tree where all keys are the texts of collation elements and data is
- * the collation element itself. The tree is used when extracting all prefix
- * for a given text.
- */
- private void buildPrefixAccess()
- {
- prefix_tree = new HashMap<String,CollationElement>();
- for (int i = 0; i < ce_table.length; i++)
- {
- CollationElement e = ce_table[i];
- prefix_tree.put(e.key, e);
- }
- }
- /**
- * This method returns an integer which indicates whether the first
- * specified <code>String</code> is less than, greater than, or equal to
- * the second. The value depends not only on the collation rules in
- * effect, but also the strength and decomposition settings of this object.
- *
- * @param source The first <code>String</code> to compare.
- * @param target A second <code>String</code> to compare to the first.
- *
- * @return A negative integer if source < target, a positive integer
- * if source > target, or 0 if source == target.
- */
- public int compare(String source, String target)
- {
- CollationElementIterator cs, ct;
- CollationElement ord1block = null;
- CollationElement ord2block = null;
- boolean advance_block_1 = true;
- boolean advance_block_2 = true;
- cs = getCollationElementIterator(source);
- ct = getCollationElementIterator(target);
- for(;;)
- {
- int ord1;
- int ord2;
- /*
- * We have to check whether the characters are ignorable.
- * If it is the case then forget them.
- */
- if (advance_block_1)
- {
- ord1block = cs.nextBlock();
- if (ord1block != null && ord1block.ignore)
- continue;
- }
- if (advance_block_2)
- {
- ord2block = ct.nextBlock();
- if (ord2block != null && ord2block.ignore)
- {
- advance_block_1 = false;
- continue;
- }
- }
- else
- advance_block_2 = true;
- if (!advance_block_1)
- advance_block_1 = true;
- if (ord1block != null)
- ord1 = ord1block.getValue();
- else
- {
- if (ord2block == null)
- return 0;
- return -1;
- }
- if (ord2block == null)
- return 1;
- ord2 = ord2block.getValue();
- // We know chars are totally equal, so skip
- if (ord1 == ord2)
- {
- if (getStrength() == IDENTICAL)
- if (!ord1block.key.equals(ord2block.key))
- return ord1block.key.compareTo(ord2block.key);
- continue;
- }
- // Check for primary strength differences
- int prim1 = CollationElementIterator.primaryOrder(ord1);
- int prim2 = CollationElementIterator.primaryOrder(ord2);
- if (prim1 == 0 && getStrength() < TERTIARY)
- {
- advance_block_2 = false;
- continue;
- }
- else if (prim2 == 0 && getStrength() < TERTIARY)
- {
- advance_block_1 = false;
- continue;
- }
- if (prim1 < prim2)
- return -1;
- else if (prim1 > prim2)
- return 1;
- else if (getStrength() == PRIMARY)
- continue;
- // Check for secondary strength differences
- int sec1 = CollationElementIterator.secondaryOrder(ord1);
- int sec2 = CollationElementIterator.secondaryOrder(ord2);
- if (sec1 < sec2)
- return -1;
- else if (sec1 > sec2)
- return 1;
- else if (getStrength() == SECONDARY)
- continue;
- // Check for tertiary differences
- int tert1 = CollationElementIterator.tertiaryOrder(ord1);
- int tert2 = CollationElementIterator.tertiaryOrder(ord2);
- if (tert1 < tert2)
- return -1;
- else if (tert1 > tert2)
- return 1;
- else if (getStrength() == TERTIARY)
- continue;
- // Apparently JDK does this (at least for my test case).
- return ord1block.key.compareTo(ord2block.key);
- }
- }
- /**
- * This method tests this object for equality against the specified
- * object. This will be true if and only if the specified object is
- * another reference to this object.
- *
- * @param obj The <code>Object</code> to compare against this object.
- *
- * @return <code>true</code> if the specified object is equal to this object,
- * <code>false</code> otherwise.
- */
- public boolean equals(Object obj)
- {
- if (obj == this)
- return true;
- else
- return false;
- }
- /**
- * This method builds a default collation element without invoking
- * the database created from the rules passed to the constructor.
- *
- * @param c Character which needs a collation element.
- * @return A valid brand new CollationElement instance.
- */
- CollationElement getDefaultElement(char c)
- {
- int v;
- // Preliminary support for generic accent sorting inversion (I don't know if all
- // characters in the range should be sorted backward). This is the place
- // to fix this if needed.
- if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
- v = 0x0361 - ((int) c - 0x02B9);
- else
- v = (short) c;
- return new CollationElement("" + c, last_primary_value + v,
- (short) 0, (short) 0, (short) 0, null, false);
- }
- /**
- * This method builds a default collation element for an accented character
- * without invoking the database created from the rules passed to the constructor.
- *
- * @param c Character which needs a collation element.
- * @return A valid brand new CollationElement instance.
- */
- CollationElement getDefaultAccentedElement(char c)
- {
- int v;
- // Preliminary support for generic accent sorting inversion (I don't know if all
- // characters in the range should be sorted backward). This is the place
- // to fix this if needed.
- if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
- v = 0x0361 - ((int) c - 0x02B9);
- else
- v = (short) c;
- return new CollationElement("" + c, (short) 0,
- (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
- }
- /**
- * This method returns an instance for <code>CollationElementIterator</code>
- * for the specified <code>String</code> under the collation rules for this
- * object.
- *
- * @param source The <code>String</code> to return the
- * <code>CollationElementIterator</code> instance for.
- *
- * @return A <code>CollationElementIterator</code> for the specified
- * <code>String</code>.
- */
- public CollationElementIterator getCollationElementIterator(String source)
- {
- return new CollationElementIterator(this, source);
- }
- /**
- * This method returns an instance of <code>CollationElementIterator</code>
- * for the <code>String</code> represented by the specified
- * <code>CharacterIterator</code>.
- *
- * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
- *
- * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
- */
- public CollationElementIterator getCollationElementIterator(CharacterIterator source)
- {
- return new CollationElementIterator(this, source);
- }
- /**
- * This method returns an instance of <code>CollationKey</code> for the
- * specified <code>String</code>. The object returned will have a
- * more efficient mechanism for its comparison function that could
- * provide speed benefits if multiple comparisons are performed, such
- * as during a sort.
- *
- * @param source The <code>String</code> to create a <code>CollationKey</code> for.
- *
- * @return A <code>CollationKey</code> for the specified <code>String</code>.
- */
- public CollationKey getCollationKey(String source)
- {
- CollationElementIterator cei = getCollationElementIterator(source);
- ArrayList<Integer> vect = new ArrayList<Integer>();
- int ord = cei.next();
- cei.reset(); //set to start of string
- while (ord != CollationElementIterator.NULLORDER)
- {
- // If the primary order is null, it means this is an ignorable
- // character.
- if (CollationElementIterator.primaryOrder(ord) == 0)
- {
- ord = cei.next();
- continue;
- }
- switch (getStrength())
- {
- case PRIMARY:
- ord = CollationElementIterator.primaryOrder(ord);
- break;
- case SECONDARY:
- ord = CollationElementIterator.primaryOrder(ord) << 8;
- ord |= CollationElementIterator.secondaryOrder(ord);
- default:
- break;
- }
- vect.add(Integer.valueOf(ord));
- ord = cei.next(); //increment to next key
- }
- Integer[] objarr = vect.toArray(new Integer[vect.size()]);
- byte[] key = new byte[objarr.length * 4];
- for (int i = 0; i < objarr.length; i++)
- {
- int j = objarr[i].intValue();
- key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
- key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
- key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
- key [i * 4 + 3] = (byte) (j & 0x000000FF);
- }
- return new CollationKey(this, source, key);
- }
- /**
- * This method returns a <code>String</code> containing the collation rules
- * for this object.
- *
- * @return The collation rules for this object.
- */
- public String getRules()
- {
- return rules;
- }
- /**
- * This method returns a hash value for this object.
- *
- * @return A hash value for this object.
- */
- public int hashCode()
- {
- return System.identityHashCode(this);
- }
- }
|