mirror of
https://github.com/autc04/Retro68.git
synced 2024-12-11 19:49:32 +00:00
490 lines
15 KiB
Java
490 lines
15 KiB
Java
/* CollationElementIterator.java -- Walks through collation elements
|
|
Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004, 2012 Free Software Foundation
|
|
|
|
This file is part of GNU Classpath.
|
|
|
|
GNU Classpath is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
GNU Classpath is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GNU Classpath; see the file COPYING. If not, write to the
|
|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301 USA.
|
|
|
|
Linking this library statically or dynamically with other modules is
|
|
making a combined work based on this library. Thus, the terms and
|
|
conditions of the GNU General Public License cover the whole
|
|
combination.
|
|
|
|
As a special exception, the copyright holders of this library give you
|
|
permission to link this library with independent modules to produce an
|
|
executable, regardless of the license terms of these independent
|
|
modules, and to copy and distribute the resulting executable under
|
|
terms of your choice, provided that you also meet, for each linked
|
|
independent module, the terms and conditions of the license of that
|
|
module. An independent module is a module which is not derived from
|
|
or based on this library. If you modify this library, you may extend
|
|
this exception to your version of the library, but you are not
|
|
obligated to do so. If you do not wish to do so, delete this
|
|
exception statement from your version. */
|
|
|
|
|
|
package java.text;
|
|
|
|
import gnu.java.lang.CPStringBuilder;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
/* Written using "Java Class Libraries", 2nd edition, plus online
|
|
* API docs for JDK 1.2 from http://www.javasoft.com.
|
|
* Status: Believed complete and correct to JDK 1.1.
|
|
*/
|
|
|
|
/**
|
|
* This class walks through the character collation elements of a
|
|
* <code>String</code> as defined by the collation rules in an instance of
|
|
* <code>RuleBasedCollator</code>. There is no public constructor for
|
|
* this class. An instance is created by calling the
|
|
* <code>getCollationElementIterator</code> method on
|
|
* <code>RuleBasedCollator</code>.
|
|
*
|
|
* @author Aaron M. Renn (arenn@urbanophile.com)
|
|
* @author Tom Tromey (tromey@cygnus.com)
|
|
* @author Guilhem Lavaux (guilhem.lavaux@free.fr)
|
|
*/
|
|
public final class CollationElementIterator
|
|
{
|
|
/**
|
|
* This is a constant value that is returned to indicate that the end of
|
|
* the string was encountered.
|
|
*/
|
|
public static final int NULLORDER = -1;
|
|
|
|
/**
|
|
* This is the RuleBasedCollator this object was created from.
|
|
*/
|
|
RuleBasedCollator collator;
|
|
|
|
/**
|
|
* This is the String that is being iterated over.
|
|
*/
|
|
CharacterIterator text;
|
|
|
|
/**
|
|
* This is the index into the collation decomposition where we are currently scanning.
|
|
*/
|
|
int index;
|
|
|
|
/**
|
|
* This is the index into the String where we are currently scanning.
|
|
*/
|
|
int textIndex;
|
|
|
|
/**
|
|
* Array containing the collation decomposition of the
|
|
* text given to the constructor.
|
|
*/
|
|
private RuleBasedCollator.CollationElement[] textDecomposition;
|
|
|
|
/**
|
|
* Array containing the index of the specified block.
|
|
*/
|
|
private int[] textIndexes;
|
|
|
|
/**
|
|
* This method initializes a new instance of <code>CollationElementIterator</code>
|
|
* to iterate over the specified <code>String</code> using the rules in the
|
|
* specified <code>RuleBasedCollator</code>.
|
|
*
|
|
* @param collator The <code>RuleBasedCollation</code> used for calculating collation values
|
|
* @param text The <code>String</code> to iterate over.
|
|
*/
|
|
CollationElementIterator(RuleBasedCollator collator, String text)
|
|
{
|
|
this.collator = collator;
|
|
|
|
setText (text);
|
|
}
|
|
|
|
/**
|
|
* This method initializes a new instance of <code>CollationElementIterator</code>
|
|
* to iterate over the specified <code>String</code> using the rules in the
|
|
* specified <code>RuleBasedCollator</code>.
|
|
*
|
|
* @param collator The <code>RuleBasedCollation</code> used for calculating collation values
|
|
* @param text The character iterator to iterate over.
|
|
*/
|
|
CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
|
|
{
|
|
this.collator = collator;
|
|
|
|
setText (text);
|
|
}
|
|
|
|
RuleBasedCollator.CollationElement nextBlock()
|
|
{
|
|
if (index >= textDecomposition.length)
|
|
return null;
|
|
|
|
RuleBasedCollator.CollationElement e = textDecomposition[index];
|
|
|
|
textIndex = textIndexes[index+1];
|
|
|
|
index++;
|
|
|
|
return e;
|
|
}
|
|
|
|
RuleBasedCollator.CollationElement previousBlock()
|
|
{
|
|
if (index == 0)
|
|
return null;
|
|
|
|
index--;
|
|
RuleBasedCollator.CollationElement e = textDecomposition[index];
|
|
|
|
textIndex = textIndexes[index+1];
|
|
|
|
return e;
|
|
}
|
|
|
|
/**
|
|
* This method returns the collation ordering value of the next character sequence
|
|
* in the string (it may be an extended character following collation rules).
|
|
* This method will return <code>NULLORDER</code> if the
|
|
* end of the string was reached.
|
|
*
|
|
* @return The collation ordering value.
|
|
*/
|
|
public int next()
|
|
{
|
|
RuleBasedCollator.CollationElement e = nextBlock();
|
|
|
|
if (e == null)
|
|
return NULLORDER;
|
|
|
|
return e.getValue();
|
|
}
|
|
|
|
/**
|
|
* This method returns the collation ordering value of the previous character
|
|
* in the string. This method will return <code>NULLORDER</code> if the
|
|
* beginning of the string was reached.
|
|
*
|
|
* @return The collation ordering value.
|
|
*/
|
|
public int previous()
|
|
{
|
|
RuleBasedCollator.CollationElement e = previousBlock();
|
|
|
|
if (e == null)
|
|
return NULLORDER;
|
|
|
|
return e.getValue();
|
|
}
|
|
|
|
/**
|
|
* This method returns the primary order value for the given collation
|
|
* value.
|
|
*
|
|
* @param order The collation value returned from <code>next()</code> or
|
|
* <code>previous()</code>.
|
|
*
|
|
* @return The primary order value of the specified collation value. This is
|
|
* the high 16 bits.
|
|
*/
|
|
public static int primaryOrder(int order)
|
|
{
|
|
// From the JDK 1.2 spec.
|
|
return order >>> 16;
|
|
}
|
|
|
|
/**
|
|
* This method resets the internal position pointer to read from the
|
|
* beginning of the <code>String</code> again.
|
|
*/
|
|
public void reset()
|
|
{
|
|
index = 0;
|
|
textIndex = 0;
|
|
}
|
|
|
|
/**
|
|
* This method returns the secondary order value for the given collation
|
|
* value.
|
|
*
|
|
* @param order The collation value returned from <code>next()</code> or
|
|
* <code>previous()</code>.
|
|
*
|
|
* @return The secondary order value of the specified collation value. This
|
|
* is the bits 8-15.
|
|
*/
|
|
public static short secondaryOrder(int order)
|
|
{
|
|
// From the JDK 1.2 spec.
|
|
return (short) ((order >>> 8) & 255);
|
|
}
|
|
|
|
/**
|
|
* This method returns the tertiary order value for the given collation
|
|
* value.
|
|
*
|
|
* @param order The collation value returned from <code>next()</code> or
|
|
* <code>previous()</code>.
|
|
*
|
|
* @return The tertiary order value of the specified collation value. This
|
|
* is the low eight bits.
|
|
*/
|
|
public static short tertiaryOrder(int order)
|
|
{
|
|
// From the JDK 1.2 spec.
|
|
return (short) (order & 255);
|
|
}
|
|
|
|
/**
|
|
* This method sets the <code>String</code> that it is iterating over
|
|
* to the specified <code>String</code>.
|
|
*
|
|
* @param text The new <code>String</code> to iterate over.
|
|
*
|
|
* @since 1.2
|
|
*/
|
|
public void setText(String text)
|
|
{
|
|
int idx = 0;
|
|
int idx_idx = 0;
|
|
int alreadyExpanded = 0;
|
|
int idxToMove = 0;
|
|
|
|
this.text = new StringCharacterIterator(text);
|
|
this.index = 0;
|
|
|
|
String work_text = text.intern();
|
|
|
|
ArrayList<RuleBasedCollator.CollationElement> aElement = new ArrayList<RuleBasedCollator.CollationElement>();
|
|
ArrayList<Integer> aIdx = new ArrayList<Integer>();
|
|
|
|
// Build element collection ordered as they come in "text".
|
|
while (idx < work_text.length())
|
|
{
|
|
String key, keyOld;
|
|
|
|
Object object = null;
|
|
int p = 1;
|
|
|
|
// IMPROVE: use a TreeMap with a prefix-ordering rule.
|
|
keyOld = key = null;
|
|
do
|
|
{
|
|
if (object != null)
|
|
keyOld = key;
|
|
key = work_text.substring (idx, idx+p);
|
|
object = collator.prefix_tree.get (key);
|
|
if (object != null && idx < alreadyExpanded)
|
|
{
|
|
RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
|
|
if (prefix.expansion != null &&
|
|
prefix.expansion.startsWith(work_text.substring(0, idx)))
|
|
{
|
|
object = null;
|
|
key = keyOld;
|
|
}
|
|
}
|
|
p++;
|
|
}
|
|
while (idx+p <= work_text.length());
|
|
|
|
if (object == null)
|
|
key = keyOld;
|
|
|
|
RuleBasedCollator.CollationElement prefix =
|
|
(RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
|
|
|
|
/*
|
|
* First case: There is no such sequence in the database.
|
|
* We will have to build one from the context.
|
|
*/
|
|
if (prefix == null)
|
|
{
|
|
/*
|
|
* We are dealing with sequences in an expansion. They
|
|
* are treated as accented characters (tertiary order).
|
|
*/
|
|
if (alreadyExpanded > 0)
|
|
{
|
|
RuleBasedCollator.CollationElement e =
|
|
collator.getDefaultAccentedElement (work_text.charAt (idx));
|
|
|
|
aElement.add (e);
|
|
aIdx.add (Integer.valueOf(idx_idx));
|
|
idx++;
|
|
alreadyExpanded--;
|
|
if (alreadyExpanded == 0)
|
|
{
|
|
/* There is not any characters left in the expansion set.
|
|
* We can increase the pointer in the source string.
|
|
*/
|
|
idx_idx += idxToMove;
|
|
idxToMove = 0;
|
|
}
|
|
else
|
|
idx_idx++;
|
|
}
|
|
else
|
|
{
|
|
/* This is a normal character. */
|
|
RuleBasedCollator.CollationElement e =
|
|
collator.getDefaultElement (work_text.charAt (idx));
|
|
Integer iRef = Integer.valueOf(idx_idx);
|
|
|
|
/* Don't forget to mark it as a special sequence so the
|
|
* string can be ordered.
|
|
*/
|
|
aElement.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
|
|
aIdx.add (iRef);
|
|
aElement.add (e);
|
|
aIdx.add (iRef);
|
|
idx_idx++;
|
|
idx++;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Second case: Here we have found a matching sequence.
|
|
* Here we have an expansion string prepend it to the "work text" and
|
|
* add the corresponding sorting element. We must also mark
|
|
*/
|
|
if (prefix.expansion != null)
|
|
{
|
|
work_text = prefix.expansion
|
|
+ work_text.substring (idx+prefix.key.length());
|
|
idx = 0;
|
|
aElement.add (prefix);
|
|
aIdx.add (Integer.valueOf(idx_idx));
|
|
if (alreadyExpanded == 0)
|
|
idxToMove = prefix.key.length();
|
|
alreadyExpanded += prefix.expansion.length()-prefix.key.length();
|
|
}
|
|
else
|
|
{
|
|
/* Third case: the simplest. We have got the prefix and it
|
|
* has not to be expanded.
|
|
*/
|
|
aElement.add (prefix);
|
|
aIdx.add (Integer.valueOf(idx_idx));
|
|
idx += prefix.key.length();
|
|
/* If the sequence is in an expansion, we must decrease the
|
|
* counter.
|
|
*/
|
|
if (alreadyExpanded > 0)
|
|
{
|
|
alreadyExpanded -= prefix.key.length();
|
|
if (alreadyExpanded == 0)
|
|
{
|
|
idx_idx += idxToMove;
|
|
idxToMove = 0;
|
|
}
|
|
}
|
|
else
|
|
idx_idx += prefix.key.length();
|
|
}
|
|
}
|
|
|
|
textDecomposition = aElement.toArray(new RuleBasedCollator.CollationElement[aElement.size()]);
|
|
textIndexes = new int[aIdx.size()+1];
|
|
for (int i = 0; i < aIdx.size(); i++)
|
|
{
|
|
textIndexes[i] = aIdx.get(i).intValue();
|
|
}
|
|
textIndexes[aIdx.size()] = text.length();
|
|
}
|
|
|
|
/**
|
|
* This method sets the <code>String</code> that it is iterating over
|
|
* to the <code>String</code> represented by the specified
|
|
* <code>CharacterIterator</code>.
|
|
*
|
|
* @param source The <code>CharacterIterator</code> containing the new
|
|
* <code>String</code> to iterate over.
|
|
*/
|
|
public void setText(CharacterIterator source)
|
|
{
|
|
CPStringBuilder expand = new CPStringBuilder();
|
|
|
|
// For now assume we read from the beginning of the string.
|
|
for (char c = source.first();
|
|
c != CharacterIterator.DONE;
|
|
c = source.next())
|
|
expand.append(c);
|
|
|
|
setText(expand.toString());
|
|
}
|
|
|
|
/**
|
|
* This method returns the current offset into the <code>String</code>
|
|
* that is being iterated over.
|
|
*
|
|
* @return The iteration index position.
|
|
*
|
|
* @since 1.2
|
|
*/
|
|
public int getOffset()
|
|
{
|
|
return textIndex;
|
|
}
|
|
|
|
/**
|
|
* This method sets the iteration index position into the current
|
|
* <code>String</code> to the specified value. This value must not
|
|
* be negative and must not be greater than the last index position
|
|
* in the <code>String</code>.
|
|
*
|
|
* @param offset The new iteration index position.
|
|
*
|
|
* @exception IllegalArgumentException If the new offset is not valid.
|
|
*/
|
|
public void setOffset(int offset)
|
|
{
|
|
if (offset < 0)
|
|
throw new IllegalArgumentException("Negative offset: " + offset);
|
|
|
|
if (offset > (text.getEndIndex() - 1))
|
|
throw new IllegalArgumentException("Offset too large: " + offset);
|
|
|
|
for (index = 0; index < textDecomposition.length; index++)
|
|
{
|
|
if (offset <= textIndexes[index])
|
|
break;
|
|
}
|
|
/*
|
|
* As textIndexes[0] == 0, we should not have to take care whether index is
|
|
* greater than 0. It is always.
|
|
*/
|
|
if (textIndexes[index] == offset)
|
|
textIndex = offset;
|
|
else
|
|
textIndex = textIndexes[index-1];
|
|
}
|
|
|
|
/**
|
|
* This method returns the maximum length of any expansion sequence that
|
|
* ends with the specified collation order value. (Whatever that means).
|
|
*
|
|
* @param value The collation order value
|
|
*
|
|
* @return The maximum length of an expansion sequence.
|
|
*/
|
|
public int getMaxExpansion(int value)
|
|
{
|
|
return 1;
|
|
}
|
|
}
|