123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490 |
- /* CollationElementIterator.java -- Walks through collation elements
- Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004, 2012 Free Software Foundation
- This file is part of GNU Classpath.
- GNU Classpath is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
- GNU Classpath is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with GNU Classpath; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301 USA.
- Linking this library statically or dynamically with other modules is
- making a combined work based on this library. Thus, the terms and
- conditions of the GNU General Public License cover the whole
- combination.
- As a special exception, the copyright holders of this library give you
- permission to link this library with independent modules to produce an
- executable, regardless of the license terms of these independent
- modules, and to copy and distribute the resulting executable under
- terms of your choice, provided that you also meet, for each linked
- independent module, the terms and conditions of the license of that
- module. An independent module is a module which is not derived from
- or based on this library. If you modify this library, you may extend
- this exception to your version of the library, but you are not
- obligated to do so. If you do not wish to do so, delete this
- exception statement from your version. */
- package java.text;
- import gnu.java.lang.CPStringBuilder;
- import java.util.ArrayList;
- /* Written using "Java Class Libraries", 2nd edition, plus online
- * API docs for JDK 1.2 from http://www.javasoft.com.
- * Status: Believed complete and correct to JDK 1.1.
- */
- /**
- * This class walks through the character collation elements of a
- * <code>String</code> as defined by the collation rules in an instance of
- * <code>RuleBasedCollator</code>. There is no public constructor for
- * this class. An instance is created by calling the
- * <code>getCollationElementIterator</code> method on
- * <code>RuleBasedCollator</code>.
- *
- * @author Aaron M. Renn (arenn@urbanophile.com)
- * @author Tom Tromey (tromey@cygnus.com)
- * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
- */
- public final class CollationElementIterator
- {
- /**
- * This is a constant value that is returned to indicate that the end of
- * the string was encountered.
- */
- public static final int NULLORDER = -1;
- /**
- * This is the RuleBasedCollator this object was created from.
- */
- RuleBasedCollator collator;
- /**
- * This is the String that is being iterated over.
- */
- CharacterIterator text;
- /**
- * This is the index into the collation decomposition where we are currently scanning.
- */
- int index;
- /**
- * This is the index into the String where we are currently scanning.
- */
- int textIndex;
- /**
- * Array containing the collation decomposition of the
- * text given to the constructor.
- */
- private RuleBasedCollator.CollationElement[] textDecomposition;
- /**
- * Array containing the index of the specified block.
- */
- private int[] textIndexes;
- /**
- * This method initializes a new instance of <code>CollationElementIterator</code>
- * to iterate over the specified <code>String</code> using the rules in the
- * specified <code>RuleBasedCollator</code>.
- *
- * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
- * @param text The <code>String</code> to iterate over.
- */
- CollationElementIterator(RuleBasedCollator collator, String text)
- {
- this.collator = collator;
- setText (text);
- }
- /**
- * This method initializes a new instance of <code>CollationElementIterator</code>
- * to iterate over the specified <code>String</code> using the rules in the
- * specified <code>RuleBasedCollator</code>.
- *
- * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
- * @param text The character iterator to iterate over.
- */
- CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
- {
- this.collator = collator;
- setText (text);
- }
- RuleBasedCollator.CollationElement nextBlock()
- {
- if (index >= textDecomposition.length)
- return null;
- RuleBasedCollator.CollationElement e = textDecomposition[index];
- textIndex = textIndexes[index+1];
- index++;
- return e;
- }
- RuleBasedCollator.CollationElement previousBlock()
- {
- if (index == 0)
- return null;
- index--;
- RuleBasedCollator.CollationElement e = textDecomposition[index];
- textIndex = textIndexes[index+1];
- return e;
- }
- /**
- * This method returns the collation ordering value of the next character sequence
- * in the string (it may be an extended character following collation rules).
- * This method will return <code>NULLORDER</code> if the
- * end of the string was reached.
- *
- * @return The collation ordering value.
- */
- public int next()
- {
- RuleBasedCollator.CollationElement e = nextBlock();
- if (e == null)
- return NULLORDER;
- return e.getValue();
- }
- /**
- * This method returns the collation ordering value of the previous character
- * in the string. This method will return <code>NULLORDER</code> if the
- * beginning of the string was reached.
- *
- * @return The collation ordering value.
- */
- public int previous()
- {
- RuleBasedCollator.CollationElement e = previousBlock();
- if (e == null)
- return NULLORDER;
- return e.getValue();
- }
- /**
- * This method returns the primary order value for the given collation
- * value.
- *
- * @param order The collation value returned from <code>next()</code> or
- * <code>previous()</code>.
- *
- * @return The primary order value of the specified collation value. This is
- * the high 16 bits.
- */
- public static int primaryOrder(int order)
- {
- // From the JDK 1.2 spec.
- return order >>> 16;
- }
- /**
- * This method resets the internal position pointer to read from the
- * beginning of the <code>String</code> again.
- */
- public void reset()
- {
- index = 0;
- textIndex = 0;
- }
- /**
- * This method returns the secondary order value for the given collation
- * value.
- *
- * @param order The collation value returned from <code>next()</code> or
- * <code>previous()</code>.
- *
- * @return The secondary order value of the specified collation value. This
- * is the bits 8-15.
- */
- public static short secondaryOrder(int order)
- {
- // From the JDK 1.2 spec.
- return (short) ((order >>> 8) & 255);
- }
- /**
- * This method returns the tertiary order value for the given collation
- * value.
- *
- * @param order The collation value returned from <code>next()</code> or
- * <code>previous()</code>.
- *
- * @return The tertiary order value of the specified collation value. This
- * is the low eight bits.
- */
- public static short tertiaryOrder(int order)
- {
- // From the JDK 1.2 spec.
- return (short) (order & 255);
- }
- /**
- * This method sets the <code>String</code> that it is iterating over
- * to the specified <code>String</code>.
- *
- * @param text The new <code>String</code> to iterate over.
- *
- * @since 1.2
- */
- public void setText(String text)
- {
- int idx = 0;
- int idx_idx = 0;
- int alreadyExpanded = 0;
- int idxToMove = 0;
- this.text = new StringCharacterIterator(text);
- this.index = 0;
- String work_text = text.intern();
- ArrayList<RuleBasedCollator.CollationElement> aElement = new ArrayList<RuleBasedCollator.CollationElement>();
- ArrayList<Integer> aIdx = new ArrayList<Integer>();
- // Build element collection ordered as they come in "text".
- while (idx < work_text.length())
- {
- String key, keyOld;
- Object object = null;
- int p = 1;
- // IMPROVE: use a TreeMap with a prefix-ordering rule.
- keyOld = key = null;
- do
- {
- if (object != null)
- keyOld = key;
- key = work_text.substring (idx, idx+p);
- object = collator.prefix_tree.get (key);
- if (object != null && idx < alreadyExpanded)
- {
- RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
- if (prefix.expansion != null &&
- prefix.expansion.startsWith(work_text.substring(0, idx)))
- {
- object = null;
- key = keyOld;
- }
- }
- p++;
- }
- while (idx+p <= work_text.length());
- if (object == null)
- key = keyOld;
- RuleBasedCollator.CollationElement prefix =
- (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
- /*
- * First case: There is no such sequence in the database.
- * We will have to build one from the context.
- */
- if (prefix == null)
- {
- /*
- * We are dealing with sequences in an expansion. They
- * are treated as accented characters (tertiary order).
- */
- if (alreadyExpanded > 0)
- {
- RuleBasedCollator.CollationElement e =
- collator.getDefaultAccentedElement (work_text.charAt (idx));
- aElement.add (e);
- aIdx.add (Integer.valueOf(idx_idx));
- idx++;
- alreadyExpanded--;
- if (alreadyExpanded == 0)
- {
- /* There is not any characters left in the expansion set.
- * We can increase the pointer in the source string.
- */
- idx_idx += idxToMove;
- idxToMove = 0;
- }
- else
- idx_idx++;
- }
- else
- {
- /* This is a normal character. */
- RuleBasedCollator.CollationElement e =
- collator.getDefaultElement (work_text.charAt (idx));
- Integer iRef = Integer.valueOf(idx_idx);
- /* Don't forget to mark it as a special sequence so the
- * string can be ordered.
- */
- aElement.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
- aIdx.add (iRef);
- aElement.add (e);
- aIdx.add (iRef);
- idx_idx++;
- idx++;
- }
- continue;
- }
- /*
- * Second case: Here we have found a matching sequence.
- * Here we have an expansion string prepend it to the "work text" and
- * add the corresponding sorting element. We must also mark
- */
- if (prefix.expansion != null)
- {
- work_text = prefix.expansion
- + work_text.substring (idx+prefix.key.length());
- idx = 0;
- aElement.add (prefix);
- aIdx.add (Integer.valueOf(idx_idx));
- if (alreadyExpanded == 0)
- idxToMove = prefix.key.length();
- alreadyExpanded += prefix.expansion.length()-prefix.key.length();
- }
- else
- {
- /* Third case: the simplest. We have got the prefix and it
- * has not to be expanded.
- */
- aElement.add (prefix);
- aIdx.add (Integer.valueOf(idx_idx));
- idx += prefix.key.length();
- /* If the sequence is in an expansion, we must decrease the
- * counter.
- */
- if (alreadyExpanded > 0)
- {
- alreadyExpanded -= prefix.key.length();
- if (alreadyExpanded == 0)
- {
- idx_idx += idxToMove;
- idxToMove = 0;
- }
- }
- else
- idx_idx += prefix.key.length();
- }
- }
- textDecomposition = aElement.toArray(new RuleBasedCollator.CollationElement[aElement.size()]);
- textIndexes = new int[aIdx.size()+1];
- for (int i = 0; i < aIdx.size(); i++)
- {
- textIndexes[i] = aIdx.get(i).intValue();
- }
- textIndexes[aIdx.size()] = text.length();
- }
- /**
- * This method sets the <code>String</code> that it is iterating over
- * to the <code>String</code> represented by the specified
- * <code>CharacterIterator</code>.
- *
- * @param source The <code>CharacterIterator</code> containing the new
- * <code>String</code> to iterate over.
- */
- public void setText(CharacterIterator source)
- {
- CPStringBuilder expand = new CPStringBuilder();
- // For now assume we read from the beginning of the string.
- for (char c = source.first();
- c != CharacterIterator.DONE;
- c = source.next())
- expand.append(c);
- setText(expand.toString());
- }
- /**
- * This method returns the current offset into the <code>String</code>
- * that is being iterated over.
- *
- * @return The iteration index position.
- *
- * @since 1.2
- */
- public int getOffset()
- {
- return textIndex;
- }
- /**
- * This method sets the iteration index position into the current
- * <code>String</code> to the specified value. This value must not
- * be negative and must not be greater than the last index position
- * in the <code>String</code>.
- *
- * @param offset The new iteration index position.
- *
- * @exception IllegalArgumentException If the new offset is not valid.
- */
- public void setOffset(int offset)
- {
- if (offset < 0)
- throw new IllegalArgumentException("Negative offset: " + offset);
- if (offset > (text.getEndIndex() - 1))
- throw new IllegalArgumentException("Offset too large: " + offset);
- for (index = 0; index < textDecomposition.length; index++)
- {
- if (offset <= textIndexes[index])
- break;
- }
- /*
- * As textIndexes[0] == 0, we should not have to take care whether index is
- * greater than 0. It is always.
- */
- if (textIndexes[index] == offset)
- textIndex = offset;
- else
- textIndex = textIndexes[index-1];
- }
- /**
- * This method returns the maximum length of any expansion sequence that
- * ends with the specified collation order value. (Whatever that means).
- *
- * @param value The collation order value
- *
- * @return The maximum length of an expansion sequence.
- */
- public int getMaxExpansion(int value)
- {
- return 1;
- }
- }
|