aboutsummaryrefslogtreecommitdiff
path: root/src/main/java/com/lowagie/text/pdf/hyphenation
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/com/lowagie/text/pdf/hyphenation')
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/ByteVector.java124
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/CharVector.java134
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/Hyphen.java68
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenation.java83
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationException.java28
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationTree.java454
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenator.java240
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/PatternConsumer.java55
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/SimplePatternParser.java278
-rw-r--r--src/main/java/com/lowagie/text/pdf/hyphenation/TernaryTree.java667
10 files changed, 0 insertions, 2131 deletions
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/ByteVector.java b/src/main/java/com/lowagie/text/pdf/hyphenation/ByteVector.java
deleted file mode 100644
index f3a83ec..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/ByteVector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import java.io.Serializable;
-
-/**
- * This class implements a simple byte vector with access to the
- * underlying array.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class ByteVector implements Serializable {
-
- /**
- * Capacity increment size
- */
- private static final int DEFAULT_BLOCK_SIZE = 2048;
- private int blockSize;
-
- /**
- * The encapsulated array
- */
- private byte[] array;
-
- /**
- * Points to next free item
- */
- private int n;
-
- public ByteVector() {
- this(DEFAULT_BLOCK_SIZE);
- }
-
- public ByteVector(int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = new byte[blockSize];
- n = 0;
- }
-
- public ByteVector(byte[] a) {
- blockSize = DEFAULT_BLOCK_SIZE;
- array = a;
- n = 0;
- }
-
- public ByteVector(byte[] a, int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = a;
- n = 0;
- }
-
- public byte[] getArray() {
- return array;
- }
-
- /**
- * return number of items in array
- */
- public int length() {
- return n;
- }
-
- /**
- * returns current capacity of array
- */
- public int capacity() {
- return array.length;
- }
-
- public void put(int index, byte val) {
- array[index] = val;
- }
-
- public byte get(int index) {
- return array[index];
- }
-
- /**
- * This is to implement memory allocation in the array. Like malloc().
- */
- public int alloc(int size) {
- int index = n;
- int len = array.length;
- if (n + size >= len) {
- byte[] aux = new byte[len + blockSize];
- System.arraycopy(array, 0, aux, 0, len);
- array = aux;
- }
- n += size;
- return index;
- }
-
- public void trimToSize() {
- if (n < array.length) {
- byte[] aux = new byte[n];
- System.arraycopy(array, 0, aux, 0, n);
- array = aux;
- }
- }
-
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/CharVector.java b/src/main/java/com/lowagie/text/pdf/hyphenation/CharVector.java
deleted file mode 100644
index 777cc7a..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/CharVector.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import java.io.Serializable;
-
-/**
- * This class implements a simple char vector with access to the
- * underlying array.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class CharVector implements Cloneable, Serializable {
-
- /**
- * Capacity increment size
- */
- private static final int DEFAULT_BLOCK_SIZE = 2048;
- private int blockSize;
-
- /**
- * The encapsulated array
- */
- private char[] array;
-
- /**
- * Points to next free item
- */
- private int n;
-
- public CharVector() {
- this(DEFAULT_BLOCK_SIZE);
- }
-
- public CharVector(int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = new char[blockSize];
- n = 0;
- }
-
- public CharVector(char[] a) {
- blockSize = DEFAULT_BLOCK_SIZE;
- array = a;
- n = a.length;
- }
-
- public CharVector(char[] a, int capacity) {
- if (capacity > 0) {
- blockSize = capacity;
- } else {
- blockSize = DEFAULT_BLOCK_SIZE;
- }
- array = a;
- n = a.length;
- }
-
- /**
- * Reset Vector but don't resize or clear elements
- */
- public void clear() {
- n = 0;
- }
-
- public Object clone() {
- CharVector cv = new CharVector((char[])array.clone(), blockSize);
- cv.n = this.n;
- return cv;
- }
-
- public char[] getArray() {
- return array;
- }
-
- /**
- * return number of items in array
- */
- public int length() {
- return n;
- }
-
- /**
- * returns current capacity of array
- */
- public int capacity() {
- return array.length;
- }
-
- public void put(int index, char val) {
- array[index] = val;
- }
-
- public char get(int index) {
- return array[index];
- }
-
- public int alloc(int size) {
- int index = n;
- int len = array.length;
- if (n + size >= len) {
- char[] aux = new char[len + blockSize];
- System.arraycopy(array, 0, aux, 0, len);
- array = aux;
- }
- n += size;
- return index;
- }
-
- public void trimToSize() {
- if (n < array.length) {
- char[] aux = new char[n];
- System.arraycopy(array, 0, aux, 0, n);
- array = aux;
- }
- }
-
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphen.java b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphen.java
deleted file mode 100644
index 7e2522c..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphen.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import java.io.Serializable;
-
-/**
- * This class represents a hyphen. A 'full' hyphen is made of 3 parts:
- * the pre-break text, post-break text and no-break. If no line-break
- * is generated at this position, the no-break text is used, otherwise,
- * pre-break and post-break are used. Typically, pre-break is equal to
- * the hyphen character and the others are empty. However, this general
- * scheme allows support for cases in some languages where words change
- * spelling if they're split across lines, like german's 'backen' which
- * hyphenates 'bak-ken'. BTW, this comes from TeX.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-
-public class Hyphen implements Serializable {
- public String preBreak;
- public String noBreak;
- public String postBreak;
-
- Hyphen(String pre, String no, String post) {
- preBreak = pre;
- noBreak = no;
- postBreak = post;
- }
-
- Hyphen(String pre) {
- preBreak = pre;
- noBreak = null;
- postBreak = null;
- }
-
- public String toString() {
- if (noBreak == null
- && postBreak == null
- && preBreak != null
- && preBreak.equals("-")) {
- return "-";
- }
- StringBuffer res = new StringBuffer("{");
- res.append(preBreak);
- res.append("}{");
- res.append(postBreak);
- res.append("}{");
- res.append(noBreak);
- res.append('}');
- return res.toString();
- }
-
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenation.java b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenation.java
deleted file mode 100644
index c86ab87..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenation.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-/**
- * This class represents a hyphenated word.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class Hyphenation {
-
- private int[] hyphenPoints;
- private String word;
-
- /**
- * number of hyphenation points in word
- */
- private int len;
-
- /**
- * rawWord as made of alternating strings and {@link Hyphen Hyphen}
- * instances
- */
- Hyphenation(String word, int[] points) {
- this.word = word;
- hyphenPoints = points;
- len = points.length;
- }
-
- /**
- * @return the number of hyphenation points in the word
- */
- public int length() {
- return len;
- }
-
- /**
- * @return the pre-break text, not including the hyphen character
- */
- public String getPreHyphenText(int index) {
- return word.substring(0, hyphenPoints[index]);
- }
-
- /**
- * @return the post-break text
- */
- public String getPostHyphenText(int index) {
- return word.substring(hyphenPoints[index]);
- }
-
- /**
- * @return the hyphenation points
- */
- public int[] getHyphenationPoints() {
- return hyphenPoints;
- }
-
- public String toString() {
- StringBuffer str = new StringBuffer();
- int start = 0;
- for (int i = 0; i < len; i++) {
- str.append(word.substring(start, hyphenPoints[i]) + "-");
- start = hyphenPoints[i];
- }
- str.append(word.substring(start));
- return str.toString();
- }
-
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationException.java b/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationException.java
deleted file mode 100644
index 575e18d..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationException.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-/**
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class HyphenationException extends Exception {
-
- public HyphenationException(String msg) {
- super(msg);
- }
-
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationTree.java b/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationTree.java
deleted file mode 100644
index b0d4c0b..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationTree.java
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* $Id: HyphenationTree.java,v 1.40 2005/07/16 16:49:34 blowagie Exp $ */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import java.io.InputStream;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-
-/**
- * This tree structure stores the hyphenation patterns in an efficient
- * way for fast lookup. It provides the provides the method to
- * hyphenate a word.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class HyphenationTree extends TernaryTree
- implements PatternConsumer, Serializable {
-
- /**
- * value space: stores the inteletter values
- */
- protected ByteVector vspace;
-
- /**
- * This map stores hyphenation exceptions
- */
- protected HashMap stoplist;
-
- /**
- * This map stores the character classes
- */
- protected TernaryTree classmap;
-
- /**
- * Temporary map to store interletter values on pattern loading.
- */
- private transient TernaryTree ivalues;
-
- public HyphenationTree() {
- stoplist = new HashMap(23); // usually a small table
- classmap = new TernaryTree();
- vspace = new ByteVector();
- vspace.alloc(1); // this reserves index 0, which we don't use
- }
-
- /**
- * Packs the values by storing them in 4 bits, two values into a byte
- * Values range is from 0 to 9. We use zero as terminator,
- * so we'll add 1 to the value.
- * @param values a string of digits from '0' to '9' representing the
- * interletter values.
- * @return the index into the vspace array where the packed values
- * are stored.
- */
- protected int packValues(String values) {
- int i, n = values.length();
- int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
- int offset = vspace.alloc(m);
- byte[] va = vspace.getArray();
- for (i = 0; i < n; i++) {
- int j = i >> 1;
- byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f);
- if ((i & 1) == 1) {
- va[j + offset] = (byte)(va[j + offset] | v);
- } else {
- va[j + offset] = (byte)(v << 4); // big endian
- }
- }
- va[m - 1 + offset] = 0; // terminator
- return offset;
- }
-
- protected String unpackValues(int k) {
- StringBuffer buf = new StringBuffer();
- byte v = vspace.get(k++);
- while (v != 0) {
- char c = (char)((v >>> 4) - 1 + '0');
- buf.append(c);
- c = (char)(v & 0x0f);
- if (c == 0) {
- break;
- }
- c = (char)(c - 1 + '0');
- buf.append(c);
- v = vspace.get(k++);
- }
- return buf.toString();
- }
-
- public void loadSimplePatterns(InputStream stream) throws HyphenationException {
- SimplePatternParser pp = new SimplePatternParser();
- ivalues = new TernaryTree();
-
- pp.parse(stream, this);
-
- // patterns/values should be now in the tree
- // let's optimize a bit
- trimToSize();
- vspace.trimToSize();
- classmap.trimToSize();
-
- // get rid of the auxiliary map
- ivalues = null;
- }
-
-
- public String findPattern(String pat) {
- int k = super.find(pat);
- if (k >= 0) {
- return unpackValues(k);
- }
- return "";
- }
-
- /**
- * String compare, returns 0 if equal or
- * t is a substring of s
- */
- protected int hstrcmp(char[] s, int si, char[] t, int ti) {
- for (; s[si] == t[ti]; si++, ti++) {
- if (s[si] == 0) {
- return 0;
- }
- }
- if (t[ti] == 0) {
- return 0;
- }
- return s[si] - t[ti];
- }
-
- protected byte[] getValues(int k) {
- StringBuffer buf = new StringBuffer();
- byte v = vspace.get(k++);
- while (v != 0) {
- char c = (char)((v >>> 4) - 1);
- buf.append(c);
- c = (char)(v & 0x0f);
- if (c == 0) {
- break;
- }
- c = (char)(c - 1);
- buf.append(c);
- v = vspace.get(k++);
- }
- byte[] res = new byte[buf.length()];
- for (int i = 0; i < res.length; i++) {
- res[i] = (byte)buf.charAt(i);
- }
- return res;
- }
-
- /**
- * <p>Search for all possible partial matches of word starting
- * at index an update interletter values. In other words, it
- * does something like:</p>
- * <code>
- * for(i=0; i<patterns.length; i++) {
- * if ( word.substring(index).startsWidth(patterns[i]) )
- * update_interletter_values(patterns[i]);
- * }
- * </code>
- * <p>But it is done in an efficient way since the patterns are
- * stored in a ternary tree. In fact, this is the whole purpose
- * of having the tree: doing this search without having to test
- * every single pattern. The number of patterns for languages
- * such as English range from 4000 to 10000. Thus, doing thousands
- * of string comparisons for each word to hyphenate would be
- * really slow without the tree. The tradeoff is memory, but
- * using a ternary tree instead of a trie, almost halves the
- * the memory used by Lout or TeX. It's also faster than using
- * a hash table</p>
- * @param word null terminated word to match
- * @param index start index from word
- * @param il interletter values array to update
- */
- protected void searchPatterns(char[] word, int index, byte[] il) {
- byte[] values;
- int i = index;
- char p, q;
- char sp = word[i];
- p = root;
-
- while (p > 0 && p < sc.length) {
- if (sc[p] == 0xFFFF) {
- if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) {
- values = getValues(eq[p]); // data pointer is in eq[]
- int j = index;
- for (int k = 0; k < values.length; k++) {
- if (j < il.length && values[k] > il[j]) {
- il[j] = values[k];
- }
- j++;
- }
- }
- return;
- }
- int d = sp - sc[p];
- if (d == 0) {
- if (sp == 0) {
- break;
- }
- sp = word[++i];
- p = eq[p];
- q = p;
-
- // look for a pattern ending at this position by searching for
- // the null char ( splitchar == 0 )
- while (q > 0 && q < sc.length) {
- if (sc[q] == 0xFFFF) { // stop at compressed branch
- break;
- }
- if (sc[q] == 0) {
- values = getValues(eq[q]);
- int j = index;
- for (int k = 0; k < values.length; k++) {
- if (j < il.length && values[k] > il[j]) {
- il[j] = values[k];
- }
- j++;
- }
- break;
- } else {
- q = lo[q];
-
- /**
- * actually the code should be:
- * q = sc[q] < 0 ? hi[q] : lo[q];
- * but java chars are unsigned
- */
- }
- }
- } else {
- p = d < 0 ? lo[p] : hi[p];
- }
- }
- }
-
- /**
- * Hyphenate word and return a Hyphenation object.
- * @param word the word to be hyphenated
- * @param remainCharCount Minimum number of characters allowed
- * before the hyphenation point.
- * @param pushCharCount Minimum number of characters allowed after
- * the hyphenation point.
- * @return a {@link Hyphenation Hyphenation} object representing
- * the hyphenated word or null if word is not hyphenated.
- */
- public Hyphenation hyphenate(String word, int remainCharCount,
- int pushCharCount) {
- char[] w = word.toCharArray();
- return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
- }
-
- /**
- * w = "****nnllllllnnn*****",
- * where n is a non-letter, l is a letter,
- * all n may be absent, the first n is at offset,
- * the first l is at offset + iIgnoreAtBeginning;
- * word = ".llllll.'\0'***",
- * where all l in w are copied into word.
- * In the first part of the routine len = w.length,
- * in the second part of the routine len = word.length.
- * Three indices are used:
- * index(w), the index in w,
- * index(word), the index in word,
- * letterindex(word), the index in the letter part of word.
- * The following relations exist:
- * index(w) = offset + i - 1
- * index(word) = i - iIgnoreAtBeginning
- * letterindex(word) = index(word) - 1
- * (see first loop).
- * It follows that:
- * index(w) - index(word) = offset - 1 + iIgnoreAtBeginning
- * index(w) = letterindex(word) + offset + iIgnoreAtBeginning
- */
-
- /**
- * Hyphenate word and return an array of hyphenation points.
- * @param w char array that contains the word
- * @param offset Offset to first character in word
- * @param len Length of word
- * @param remainCharCount Minimum number of characters allowed
- * before the hyphenation point.
- * @param pushCharCount Minimum number of characters allowed after
- * the hyphenation point.
- * @return a {@link Hyphenation Hyphenation} object representing
- * the hyphenated word or null if word is not hyphenated.
- */
- public Hyphenation hyphenate(char[] w, int offset, int len,
- int remainCharCount, int pushCharCount) {
- int i;
- char[] word = new char[len + 3];
-
- // normalize word
- char[] c = new char[2];
- int iIgnoreAtBeginning = 0;
- int iLength = len;
- boolean bEndOfLetters = false;
- for (i = 1; i <= len; i++) {
- c[0] = w[offset + i - 1];
- int nc = classmap.find(c, 0);
- if (nc < 0) { // found a non-letter character ...
- if (i == (1 + iIgnoreAtBeginning)) {
- // ... before any letter character
- iIgnoreAtBeginning ++;
- } else {
- // ... after a letter character
- bEndOfLetters = true;
- }
- iLength --;
- } else {
- if (!bEndOfLetters) {
- word[i - iIgnoreAtBeginning] = (char)nc;
- } else {
- return null;
- }
- }
- }
- len = iLength;
- if (len < (remainCharCount + pushCharCount)) {
- // word is too short to be hyphenated
- return null;
- }
- int[] result = new int[len + 1];
- int k = 0;
-
- // check exception list first
- String sw = new String(word, 1, len);
- if (stoplist.containsKey(sw)) {
- // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
- ArrayList hw = (ArrayList)stoplist.get(sw);
- int j = 0;
- for (i = 0; i < hw.size(); i++) {
- Object o = hw.get(i);
- // j = index(sw) = letterindex(word)?
- // result[k] = corresponding index(w)
- if (o instanceof String) {
- j += ((String)o).length();
- if (j >= remainCharCount && j < (len - pushCharCount)) {
- result[k++] = j + iIgnoreAtBeginning;
- }
- }
- }
- } else {
- // use algorithm to get hyphenation points
- word[0] = '.'; // word start marker
- word[len + 1] = '.'; // word end marker
- word[len + 2] = 0; // null terminated
- byte[] il = new byte[len + 3]; // initialized to zero
- for (i = 0; i < len + 1; i++) {
- searchPatterns(word, i, il);
- }
-
- // hyphenation points are located where interletter value is odd
- // i is letterindex(word),
- // i + 1 is index(word),
- // result[k] = corresponding index(w)
- for (i = 0; i < len; i++) {
- if (((il[i + 1] & 1) == 1) && i >= remainCharCount
- && i <= (len - pushCharCount)) {
- result[k++] = i + iIgnoreAtBeginning;
- }
- }
- }
-
-
- if (k > 0) {
- // trim result array
- int[] res = new int[k];
- System.arraycopy(result, 0, res, 0, k);
- return new Hyphenation(new String(w, offset, len), res);
- } else {
- return null;
- }
- }
-
- /**
- * Add a character class to the tree. It is used by
- * {@link SimplePatternParser SimplePatternParser} as callback to
- * add character classes. Character classes define the
- * valid word characters for hyphenation. If a word contains
- * a character not defined in any of the classes, it is not hyphenated.
- * It also defines a way to normalize the characters in order
- * to compare them with the stored patterns. Usually pattern
- * files use only lower case characters, in this case a class
- * for letter 'a', for example, should be defined as "aA", the first
- * character being the normalization char.
- */
- public void addClass(String chargroup) {
- if (chargroup.length() > 0) {
- char equivChar = chargroup.charAt(0);
- char[] key = new char[2];
- key[1] = 0;
- for (int i = 0; i < chargroup.length(); i++) {
- key[0] = chargroup.charAt(i);
- classmap.insert(key, 0, equivChar);
- }
- }
- }
-
- /**
- * Add an exception to the tree. It is used by
- * {@link SimplePatternParser SimplePatternParser} class as callback to
- * store the hyphenation exceptions.
- * @param word normalized word
- * @param hyphenatedword a vector of alternating strings and
- * {@link Hyphen hyphen} objects.
- */
- public void addException(String word, ArrayList hyphenatedword) {
- stoplist.put(word, hyphenatedword);
- }
-
- /**
- * Add a pattern to the tree. Mainly, to be used by
- * {@link SimplePatternParser SimplePatternParser} class as callback to
- * add a pattern to the tree.
- * @param pattern the hyphenation pattern
- * @param ivalue interletter weight values indicating the
- * desirability and priority of hyphenating at a given point
- * within the pattern. It should contain only digit characters.
- * (i.e. '0' to '9').
- */
- public void addPattern(String pattern, String ivalue) {
- int k = ivalues.find(ivalue);
- if (k <= 0) {
- k = packValues(ivalue);
- ivalues.insert(ivalue, (char)k);
- }
- insert(pattern, (char)k);
- }
-
- public void printStats() {
- System.out.println("Value space size = "
- + Integer.toString(vspace.length()));
- super.printStats();
- }
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenator.java b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenator.java
deleted file mode 100644
index bdd34b9..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenator.java
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.util.Hashtable;
-
-import com.lowagie.text.pdf.BaseFont;
-
-/**
- * This class is the main entry point to the hyphenation package.
- * You can use only the static methods or create an instance.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public class Hyphenator {
-
- /** TODO: Don't use statics */
- private static Hashtable hyphenTrees = new Hashtable();
-
- private HyphenationTree hyphenTree = null;
- private int remainCharCount = 2;
- private int pushCharCount = 2;
- private static boolean errorDump = false;
- private static final String defaultHyphLocation = "com/lowagie/text/pdf/hyphenation/hyph/";
-
- /** Holds value of property hyphenDir. */
- private static String hyphenDir = "";
-
- /**
- * @param lang
- * @param country
- * @param leftMin
- * @param rightMin
- */
- public Hyphenator(String lang, String country, int leftMin,
- int rightMin) {
- hyphenTree = getHyphenationTree(lang, country);
- remainCharCount = leftMin;
- pushCharCount = rightMin;
- }
-
- /**
- * @param lang
- * @param country
- * @return the hyphenation tree
- */
- public static HyphenationTree getHyphenationTree(String lang,
- String country) {
- String key = lang;
- // check whether the country code has been used
- if (country != null && !country.equals("none")) {
- key += "_" + country;
- }
- // first try to find it in the cache
- if (hyphenTrees.containsKey(key)) {
- return (HyphenationTree)hyphenTrees.get(key);
- }
- if (hyphenTrees.containsKey(lang)) {
- return (HyphenationTree)hyphenTrees.get(lang);
- }
-
- HyphenationTree hTree = getResourceHyphenationTree(key);
- if (hTree == null)
- hTree = getFileHyphenationTree(key);
- // put it into the pattern cache
- if (hTree != null) {
- hyphenTrees.put(key, hTree);
- }
- return hTree;
- }
-
- /**
- * @param key
- * @return a hyphenation tree
- */
- public static HyphenationTree getResourceHyphenationTree(String key) {
- try {
- InputStream stream = BaseFont.getResourceStream(defaultHyphLocation + key + ".xml");
- if (stream == null && key.length() > 2)
- stream = BaseFont.getResourceStream(defaultHyphLocation + key.substring(0, 2) + ".xml");
- if (stream == null)
- return null;
- HyphenationTree hTree = new HyphenationTree();
- hTree.loadSimplePatterns(stream);
- return hTree;
- }
- catch (Exception e) {
- return null;
- }
- }
-
- /**
- * @param key
- * @return a hyphenation tree
- */
- public static HyphenationTree getFileHyphenationTree(String key) {
- try {
- if (hyphenDir == null)
- return null;
- InputStream stream = null;
- File hyphenFile = new File(hyphenDir, key + ".xml");
- if (hyphenFile.canRead())
- stream = new FileInputStream(hyphenFile);
- if (stream == null && key.length() > 2) {
- hyphenFile = new File(hyphenDir, key.substring(0, 2) + ".xml");
- if (hyphenFile.canRead())
- stream = new FileInputStream(hyphenFile);
- }
- if (stream == null)
- return null;
- HyphenationTree hTree = new HyphenationTree();
- hTree.loadSimplePatterns(stream);
- return hTree;
- }
- catch (Exception e) {
- return null;
- }
- }
-
- /**
- * @param lang
- * @param country
- * @param word
- * @param leftMin
- * @param rightMin
- * @return a hyphenation object
- */
- public static Hyphenation hyphenate(String lang, String country,
- String word, int leftMin,
- int rightMin) {
- HyphenationTree hTree = getHyphenationTree(lang, country);
- if (hTree == null) {
- //log.error("Error building hyphenation tree for language "
- // + lang);
- return null;
- }
- return hTree.hyphenate(word, leftMin, rightMin);
- }
-
- /**
- * @param lang
- * @param country
- * @param word
- * @param offset
- * @param len
- * @param leftMin
- * @param rightMin
- * @return a hyphenation object
- */
- public static Hyphenation hyphenate(String lang, String country,
- char[] word, int offset, int len,
- int leftMin, int rightMin) {
- HyphenationTree hTree = getHyphenationTree(lang, country);
- if (hTree == null) {
- //log.error("Error building hyphenation tree for language "
- // + lang);
- return null;
- }
- return hTree.hyphenate(word, offset, len, leftMin, rightMin);
- }
-
- /**
- * @param min
- */
- public void setMinRemainCharCount(int min) {
- remainCharCount = min;
- }
-
- /**
- * @param min
- */
- public void setMinPushCharCount(int min) {
- pushCharCount = min;
- }
-
- /**
- * @param lang
- * @param country
- */
- public void setLanguage(String lang, String country) {
- hyphenTree = getHyphenationTree(lang, country);
- }
-
- /**
- * @param word
- * @param offset
- * @param len
- * @return a hyphenation object
- */
- public Hyphenation hyphenate(char[] word, int offset, int len) {
- if (hyphenTree == null) {
- return null;
- }
- return hyphenTree.hyphenate(word, offset, len, remainCharCount,
- pushCharCount);
- }
-
- /**
- * @param word
- * @return a hyphenation object
- */
- public Hyphenation hyphenate(String word) {
- if (hyphenTree == null) {
- return null;
- }
- return hyphenTree.hyphenate(word, remainCharCount, pushCharCount);
- }
-
- /** Getter for property hyphenDir.
- * @return Value of property hyphenDir.
- */
- public static String getHyphenDir() {
- return hyphenDir;
- }
-
- /** Setter for property hyphenDir.
- * @param _hyphenDir New value of property hyphenDir.
- */
- public static void setHyphenDir(String _hyphenDir) {
- hyphenDir = _hyphenDir;
- }
-
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/PatternConsumer.java b/src/main/java/com/lowagie/text/pdf/hyphenation/PatternConsumer.java
deleted file mode 100644
index d7c6b63..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/PatternConsumer.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import java.util.ArrayList;
-
-/**
- * This interface is used to connect the XML pattern file parser to
- * the hyphenation tree.
- *
- * @author Carlos Villegas <cav@uniscope.co.jp>
- */
-public interface PatternConsumer {
-
- /**
- * Add a character class.
- * A character class defines characters that are considered
- * equivalent for the purpose of hyphenation (e.g. "aA"). It
- * usually means to ignore case.
- * @param chargroup character group
- */
- void addClass(String chargroup);
-
- /**
- * Add a hyphenation exception. An exception replaces the
- * result obtained by the algorithm for cases for which this
- * fails or the user wants to provide his own hyphenation.
- * A hyphenatedword is a vector of alternating String's and
- * {@link Hyphen Hyphen} instances
- */
- void addException(String word, ArrayList hyphenatedword);
-
- /**
- * Add hyphenation patterns.
- * @param pattern the pattern
- * @param values interletter values expressed as a string of
- * digit characters.
- */
- void addPattern(String pattern, String values);
-
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/SimplePatternParser.java b/src/main/java/com/lowagie/text/pdf/hyphenation/SimplePatternParser.java
deleted file mode 100644
index ade8885..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/SimplePatternParser.java
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright 2005 by Paulo Soares.
- *
- * The contents of this file are subject to the Mozilla Public License Version 1.1
- * (the "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the License.
- *
- * The Original Code is 'iText, a free JAVA-PDF library'.
- *
- * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
- * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
- * All Rights Reserved.
- * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
- * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
- *
- * Contributor(s): all the names of the contributors are added in the source code
- * where applicable.
- *
- * Alternatively, the contents of this file may be used under the terms of the
- * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
- * provisions of LGPL are applicable instead of those above. If you wish to
- * allow use of your version of this file only under the terms of the LGPL
- * License and not to allow others to use your version of this file under
- * the MPL, indicate your decision by deleting the provisions above and
- * replace them with the notice and other provisions required by the LGPL.
- * If you do not delete the provisions above, a recipient may use your version
- * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the MPL as stated above or under the terms of the GNU
- * Library General Public License as published by the Free Software Foundation;
- * either version 2 of the License, or any later version.
- *
- * This library is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
- * details.
- *
- * If you didn't download this code from the following link, you should check if
- * you aren't using an obsolete version:
- * http://www.lowagie.com/iText/
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import com.lowagie.text.pdf.*;
-import com.lowagie.text.ExceptionConverter;
-import java.util.ArrayList;
-import java.util.StringTokenizer;
-import java.io.InputStream;
-import java.io.IOException;
-import java.io.FileInputStream;
-
-/** Parses the xml hyphenation pattern.
- *
- * @author Paulo Soares (psoares@consiste.pt)
- */
-public class SimplePatternParser implements SimpleXMLDocHandler, PatternConsumer {
- int currElement;
- PatternConsumer consumer;
- StringBuffer token;
- ArrayList exception;
- char hyphenChar;
- SimpleXMLParser parser;
-
- static final int ELEM_CLASSES = 1;
- static final int ELEM_EXCEPTIONS = 2;
- static final int ELEM_PATTERNS = 3;
- static final int ELEM_HYPHEN = 4;
-
- /** Creates a new instance of PatternParser2 */
- public SimplePatternParser() {
- token = new StringBuffer();
- hyphenChar = '-'; // default
- }
-
- public void parse(InputStream stream, PatternConsumer consumer) {
- this.consumer = consumer;
- try {
- SimpleXMLParser.parse(this, stream);
- }
- catch (IOException e) {
- throw new ExceptionConverter(e);
- }
- finally {
- try{stream.close();}catch(Exception e){}
- }
- }
-
- protected static String getPattern(String word) {
- StringBuffer pat = new StringBuffer();
- int len = word.length();
- for (int i = 0; i < len; i++) {
- if (!Character.isDigit(word.charAt(i))) {
- pat.append(word.charAt(i));
- }
- }
- return pat.toString();
- }
-
- protected ArrayList normalizeException(ArrayList ex) {
- ArrayList res = new ArrayList();
- for (int i = 0; i < ex.size(); i++) {
- Object item = ex.get(i);
- if (item instanceof String) {
- String str = (String)item;
- StringBuffer buf = new StringBuffer();
- for (int j = 0; j < str.length(); j++) {
- char c = str.charAt(j);
- if (c != hyphenChar) {
- buf.append(c);
- } else {
- res.add(buf.toString());
- buf.setLength(0);
- char[] h = new char[1];
- h[0] = hyphenChar;
- // we use here hyphenChar which is not necessarily
- // the one to be printed
- res.add(new Hyphen(new String(h), null, null));
- }
- }
- if (buf.length() > 0) {
- res.add(buf.toString());
- }
- } else {
- res.add(item);
- }
- }
- return res;
- }
-
- protected String getExceptionWord(ArrayList ex) {
- StringBuffer res = new StringBuffer();
- for (int i = 0; i < ex.size(); i++) {
- Object item = ex.get(i);
- if (item instanceof String) {
- res.append((String)item);
- } else {
- if (((Hyphen)item).noBreak != null) {
- res.append(((Hyphen)item).noBreak);
- }
- }
- }
- return res.toString();
- }
-
- protected static String getInterletterValues(String pat) {
- StringBuffer il = new StringBuffer();
- String word = pat + "a"; // add dummy letter to serve as sentinel
- int len = word.length();
- for (int i = 0; i < len; i++) {
- char c = word.charAt(i);
- if (Character.isDigit(c)) {
- il.append(c);
- i++;
- } else {
- il.append('0');
- }
- }
- return il.toString();
- }
-
- public void endDocument() {
- }
-
- public void endElement(String tag) {
- if (token.length() > 0) {
- String word = token.toString();
- switch (currElement) {
- case ELEM_CLASSES:
- consumer.addClass(word);
- break;
- case ELEM_EXCEPTIONS:
- exception.add(word);
- exception = normalizeException(exception);
- consumer.addException(getExceptionWord(exception),
- (ArrayList)exception.clone());
- break;
- case ELEM_PATTERNS:
- consumer.addPattern(getPattern(word),
- getInterletterValues(word));
- break;
- case ELEM_HYPHEN:
- // nothing to do
- break;
- }
- if (currElement != ELEM_HYPHEN) {
- token.setLength(0);
- }
- }
- if (currElement == ELEM_HYPHEN) {
- currElement = ELEM_EXCEPTIONS;
- } else {
- currElement = 0;
- }
- }
-
- public void startDocument() {
- }
-
- public void startElement(String tag, java.util.HashMap h) {
- if (tag.equals("hyphen-char")) {
- String hh = (String)h.get("value");
- if (hh != null && hh.length() == 1) {
- hyphenChar = hh.charAt(0);
- }
- } else if (tag.equals("classes")) {
- currElement = ELEM_CLASSES;
- } else if (tag.equals("patterns")) {
- currElement = ELEM_PATTERNS;
- } else if (tag.equals("exceptions")) {
- currElement = ELEM_EXCEPTIONS;
- exception = new ArrayList();
- } else if (tag.equals("hyphen")) {
- if (token.length() > 0) {
- exception.add(token.toString());
- }
- exception.add(new Hyphen((String)h.get("pre"),
- (String)h.get("no"),
- (String)h.get("post")));
- currElement = ELEM_HYPHEN;
- }
- token.setLength(0);
- }
-
- public void text(String str) {
- StringTokenizer tk = new StringTokenizer(str);
- while (tk.hasMoreTokens()) {
- String word = tk.nextToken();
- // System.out.println("\"" + word + "\"");
- switch (currElement) {
- case ELEM_CLASSES:
- consumer.addClass(word);
- break;
- case ELEM_EXCEPTIONS:
- exception.add(word);
- exception = normalizeException(exception);
- consumer.addException(getExceptionWord(exception),
- (ArrayList)exception.clone());
- exception.clear();
- break;
- case ELEM_PATTERNS:
- consumer.addPattern(getPattern(word),
- getInterletterValues(word));
- break;
- }
- }
- }
-
- // PatternConsumer implementation for testing purposes
- public void addClass(String c) {
- System.out.println("class: " + c);
- }
-
- public void addException(String w, ArrayList e) {
- System.out.println("exception: " + w + " : " + e.toString());
- }
-
- public void addPattern(String p, String v) {
- System.out.println("pattern: " + p + " : " + v);
- }
-
- public static void main(String[] args) throws Exception {
- try {
- if (args.length > 0) {
- SimplePatternParser pp = new SimplePatternParser();
- pp.parse(new FileInputStream(args[0]), pp);
- }
- }
- catch (Exception e) {
- e.printStackTrace();
- }
- }
-}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/TernaryTree.java b/src/main/java/com/lowagie/text/pdf/hyphenation/TernaryTree.java
deleted file mode 100644
index 0dc16e5..0000000
--- a/src/main/java/com/lowagie/text/pdf/hyphenation/TernaryTree.java
+++ /dev/null
@@ -1,667 +0,0 @@
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lowagie.text.pdf.hyphenation;
-
-import java.util.Enumeration;
-import java.util.Stack;
-import java.io.Serializable;
-
-/**
- * <h2>Ternary Search Tree.</h2>
- *
- * <p>A ternary search tree is a hibrid between a binary tree and
- * a digital search tree (trie). Keys are limited to strings.
- * A data value of type char is stored in each leaf node.
- * It can be used as an index (or pointer) to the data.
- * Branches that only contain one key are compressed to one node
- * by storing a pointer to the trailer substring of the key.
- * This class is intended to serve as base class or helper class
- * to implement Dictionary collections or the like. Ternary trees
- * have some nice properties as the following: the tree can be
- * traversed in sorted order, partial matches (wildcard) can be
- * implemented, retrieval of all keys within a given distance
- * from the target, etc. The storage requirements are higher than
- * a binary tree but a lot less than a trie. Performance is
- * comparable with a hash table, sometimes it outperforms a hash
- * function (most of the time can determine a miss faster than a hash).</p>
- *
- * <p>The main purpose of this java port is to serve as a base for
- * implementing TeX's hyphenation algorithm (see The TeXBook,
- * appendix H). Each language requires from 5000 to 15000 hyphenation
- * patterns which will be keys in this tree. The strings patterns
- * are usually small (from 2 to 5 characters), but each char in the
- * tree is stored in a node. Thus memory usage is the main concern.
- * We will sacrify 'elegance' to keep memory requirenments to the
- * minimum. Using java's char type as pointer (yes, I know pointer
- * it is a forbidden word in java) we can keep the size of the node
- * to be just 8 bytes (3 pointers and the data char). This gives
- * room for about 65000 nodes. In my tests the english patterns
- * took 7694 nodes and the german patterns 10055 nodes,
- * so I think we are safe.</p>
- *
- * <p>All said, this is a map with strings as keys and char as value.
- * Pretty limited!. It can be extended to a general map by
- * using the string representation of an object and using the
- * char value as an index to an array that contains the object
- * values.</p>
- *
- * @author cav@uniscope.co.jp
- */
-
-public class TernaryTree implements Cloneable, Serializable {
-
- /**
- * We use 4 arrays to represent a node. I guess I should have created
- * a proper node class, but somehow Knuth's pascal code made me forget
- * we now have a portable language with virtual memory management and
- * automatic garbage collection! And now is kind of late, furthermore,
- * if it ain't broken, don't fix it.
- */
-
- /**
- * Pointer to low branch and to rest of the key when it is
- * stored directly in this node, we don't have unions in java!
- */
- protected char[] lo;
-
- /**
- * Pointer to high branch.
- */
- protected char[] hi;
-
- /**
- * Pointer to equal branch and to data when this node is a string terminator.
- */
- protected char[] eq;
-
- /**
- * <P>The character stored in this node: splitchar.
- * Two special values are reserved:</P>
- * <ul><li>0x0000 as string terminator</li>
- * <li>0xFFFF to indicate that the branch starting at
- * this node is compressed</li></ul>
- * <p>This shouldn't be a problem if we give the usual semantics to
- * strings since 0xFFFF is garanteed not to be an Unicode character.</p>
- */
- protected char[] sc;
-
- /**
- * This vector holds the trailing of the keys when the branch is compressed.
- */
- protected CharVector kv;
-
- protected char root;
- protected char freenode;
- protected int length; // number of items in tree
-
- protected static final int BLOCK_SIZE = 2048; // allocation size for arrays
-
- TernaryTree() {
- init();
- }
-
- protected void init() {
- root = 0;
- freenode = 1;
- length = 0;
- lo = new char[BLOCK_SIZE];
- hi = new char[BLOCK_SIZE];
- eq = new char[BLOCK_SIZE];
- sc = new char[BLOCK_SIZE];
- kv = new CharVector();
- }
-
- /**
- * Branches are initially compressed, needing
- * one node per key plus the size of the string
- * key. They are decompressed as needed when
- * another key with same prefix
- * is inserted. This saves a lot of space,
- * specially for long keys.
- */
- public void insert(String key, char val) {
- // make sure we have enough room in the arrays
- int len = key.length()
- + 1; // maximum number of nodes that may be generated
- if (freenode + len > eq.length) {
- redimNodeArrays(eq.length + BLOCK_SIZE);
- }
- char strkey[] = new char[len--];
- key.getChars(0, len, strkey, 0);
- strkey[len] = 0;
- root = insert(root, strkey, 0, val);
- }
-
- public void insert(char[] key, int start, char val) {
- int len = strlen(key) + 1;
- if (freenode + len > eq.length) {
- redimNodeArrays(eq.length + BLOCK_SIZE);
- }
- root = insert(root, key, start, val);
- }
-
- /**
- * The actual insertion function, recursive version.
- */
- private char insert(char p, char[] key, int start, char val) {
- int len = strlen(key, start);
- if (p == 0) {
- // this means there is no branch, this node will start a new branch.
- // Instead of doing that, we store the key somewhere else and create
- // only one node with a pointer to the key
- p = freenode++;
- eq[p] = val; // holds data
- length++;
- hi[p] = 0;
- if (len > 0) {
- sc[p] = 0xFFFF; // indicates branch is compressed
- lo[p] = (char)kv.alloc(len
- + 1); // use 'lo' to hold pointer to key
- strcpy(kv.getArray(), lo[p], key, start);
- } else {
- sc[p] = 0;
- lo[p] = 0;
- }
- return p;
- }
-
- if (sc[p] == 0xFFFF) {
- // branch is compressed: need to decompress
- // this will generate garbage in the external key array
- // but we can do some garbage collection later
- char pp = freenode++;
- lo[pp] = lo[p]; // previous pointer to key
- eq[pp] = eq[p]; // previous pointer to data
- lo[p] = 0;
- if (len > 0) {
- sc[p] = kv.get(lo[pp]);
- eq[p] = pp;
- lo[pp]++;
- if (kv.get(lo[pp]) == 0) {
- // key completly decompressed leaving garbage in key array
- lo[pp] = 0;
- sc[pp] = 0;
- hi[pp] = 0;
- } else {
- // we only got first char of key, rest is still there
- sc[pp] = 0xFFFF;
- }
- } else {
- // In this case we can save a node by swapping the new node
- // with the compressed node
- sc[pp] = 0xFFFF;
- hi[p] = pp;
- sc[p] = 0;
- eq[p] = val;
- length++;
- return p;
- }
- }
- char s = key[start];
- if (s < sc[p]) {
- lo[p] = insert(lo[p], key, start, val);
- } else if (s == sc[p]) {
- if (s != 0) {
- eq[p] = insert(eq[p], key, start + 1, val);
- } else {
- // key already in tree, overwrite data
- eq[p] = val;
- }
- } else {
- hi[p] = insert(hi[p], key, start, val);
- }
- return p;
- }
-
- /**
- * Compares 2 null terminated char arrays
- */
- public static int strcmp(char[] a, int startA, char[] b, int startB) {
- for (; a[startA] == b[startB]; startA++, startB++) {
- if (a[startA] == 0) {
- return 0;
- }
- }
- return a[startA] - b[startB];
- }
-
- /**
- * Compares a string with null terminated char array
- */
- public static int strcmp(String str, char[] a, int start) {
- int i, d, len = str.length();
- for (i = 0; i < len; i++) {
- d = (int)str.charAt(i) - a[start + i];
- if (d != 0) {
- return d;
- }
- if (a[start + i] == 0) {
- return d;
- }
- }
- if (a[start + i] != 0) {
- return (int)-a[start + i];
- }
- return 0;
-
- }
-
- public static void strcpy(char[] dst, int di, char[] src, int si) {
- while (src[si] != 0) {
- dst[di++] = src[si++];
- }
- dst[di] = 0;
- }
-
- public static int strlen(char[] a, int start) {
- int len = 0;
- for (int i = start; i < a.length && a[i] != 0; i++) {
- len++;
- }
- return len;
- }
-
- public static int strlen(char[] a) {
- return strlen(a, 0);
- }
-
- public int find(String key) {
- int len = key.length();
- char strkey[] = new char[len + 1];
- key.getChars(0, len, strkey, 0);
- strkey[len] = 0;
-
- return find(strkey, 0);
- }
-
- public int find(char[] key, int start) {
- int d;
- char p = root;
- int i = start;
- char c;
-
- while (p != 0) {
- if (sc[p] == 0xFFFF) {
- if (strcmp(key, i, kv.getArray(), lo[p]) == 0) {
- return eq[p];
- } else {
- return -1;
- }
- }
- c = key[i];
- d = c - sc[p];
- if (d == 0) {
- if (c == 0) {
- return eq[p];
- }
- i++;
- p = eq[p];
- } else if (d < 0) {
- p = lo[p];
- } else {
- p = hi[p];
- }
- }
- return -1;
- }
-
- public boolean knows(String key) {
- return (find(key) >= 0);
- }
-
- // redimension the arrays
- private void redimNodeArrays(int newsize) {
- int len = newsize < lo.length ? newsize : lo.length;
- char[] na = new char[newsize];
- System.arraycopy(lo, 0, na, 0, len);
- lo = na;
- na = new char[newsize];
- System.arraycopy(hi, 0, na, 0, len);
- hi = na;
- na = new char[newsize];
- System.arraycopy(eq, 0, na, 0, len);
- eq = na;
- na = new char[newsize];
- System.arraycopy(sc, 0, na, 0, len);
- sc = na;
- }
-
- public int size() {
- return length;
- }
-
- public Object clone() {
- TernaryTree t = new TernaryTree();
- t.lo = (char[])this.lo.clone();
- t.hi = (char[])this.hi.clone();
- t.eq = (char[])this.eq.clone();
- t.sc = (char[])this.sc.clone();
- t.kv = (CharVector)this.kv.clone();
- t.root = this.root;
- t.freenode = this.freenode;
- t.length = this.length;
-
- return t;
- }
-
- /**
- * Recursively insert the median first and then the median of the
- * lower and upper halves, and so on in order to get a balanced
- * tree. The array of keys is assumed to be sorted in ascending
- * order.
- */
- protected void insertBalanced(String[] k, char[] v, int offset, int n) {
- int m;
- if (n < 1) {
- return;
- }
- m = n >> 1;
-
- insert(k[m + offset], v[m + offset]);
- insertBalanced(k, v, offset, m);
-
- insertBalanced(k, v, offset + m + 1, n - m - 1);
- }
-
-
- /**
- * Balance the tree for best search performance
- */
- public void balance() {
- // System.out.print("Before root splitchar = "); System.out.println(sc[root]);
-
- int i = 0, n = length;
- String[] k = new String[n];
- char[] v = new char[n];
- Iterator iter = new Iterator();
- while (iter.hasMoreElements()) {
- v[i] = iter.getValue();
- k[i++] = (String)iter.nextElement();
- }
- init();
- insertBalanced(k, v, 0, n);
-
- // With uniform letter distribution sc[root] should be around 'm'
- // System.out.print("After root splitchar = "); System.out.println(sc[root]);
- }
-
- /**
- * Each node stores a character (splitchar) which is part of
- * some key(s). In a compressed branch (one that only contain
- * a single string key) the trailer of the key which is not
- * already in nodes is stored externally in the kv array.
- * As items are inserted, key substrings decrease.
- * Some substrings may completely disappear when the whole
- * branch is totally decompressed.
- * The tree is traversed to find the key substrings actually
- * used. In addition, duplicate substrings are removed using
- * a map (implemented with a TernaryTree!).
- *
- */
- public void trimToSize() {
- // first balance the tree for best performance
- balance();
-
- // redimension the node arrays
- redimNodeArrays(freenode);
-
- // ok, compact kv array
- CharVector kx = new CharVector();
- kx.alloc(1);
- TernaryTree map = new TernaryTree();
- compact(kx, map, root);
- kv = kx;
- kv.trimToSize();
- }
-
- private void compact(CharVector kx, TernaryTree map, char p) {
- int k;
- if (p == 0) {
- return;
- }
- if (sc[p] == 0xFFFF) {
- k = map.find(kv.getArray(), lo[p]);
- if (k < 0) {
- k = kx.alloc(strlen(kv.getArray(), lo[p]) + 1);
- strcpy(kx.getArray(), k, kv.getArray(), lo[p]);
- map.insert(kx.getArray(), k, (char)k);
- }
- lo[p] = (char)k;
- } else {
- compact(kx, map, lo[p]);
- if (sc[p] != 0) {
- compact(kx, map, eq[p]);
- }
- compact(kx, map, hi[p]);
- }
- }
-
-
- public Enumeration keys() {
- return new Iterator();
- }
-
- public class Iterator implements Enumeration {
-
- /**
- * current node index
- */
- int cur;
-
- /**
- * current key
- */
- String curkey;
-
- private class Item implements Cloneable {
- char parent;
- char child;
-
- public Item() {
- parent = 0;
- child = 0;
- }
-
- public Item(char p, char c) {
- parent = p;
- child = c;
- }
-
- public Object clone() {
- return new Item(parent, child);
- }
-
- }
-
- /**
- * Node stack
- */
- Stack ns;
-
- /**
- * key stack implemented with a StringBuffer
- */
- StringBuffer ks;
-
- public Iterator() {
- cur = -1;
- ns = new Stack();
- ks = new StringBuffer();
- rewind();
- }
-
- public void rewind() {
- ns.removeAllElements();
- ks.setLength(0);
- cur = root;
- run();
- }
-
- public Object nextElement() {
- String res = new String(curkey);
- cur = up();
- run();
- return res;
- }
-
- public char getValue() {
- if (cur >= 0) {
- return eq[cur];
- }
- return 0;
- }
-
- public boolean hasMoreElements() {
- return (cur != -1);
- }
-
- /**
- * traverse upwards
- */
- private int up() {
- Item i = new Item();
- int res = 0;
-
- if (ns.empty()) {
- return -1;
- }
-
- if (cur != 0 && sc[cur] == 0) {
- return lo[cur];
- }
-
- boolean climb = true;
-
- while (climb) {
- i = (Item)ns.pop();
- i.child++;
- switch (i.child) {
- case 1:
- if (sc[i.parent] != 0) {
- res = eq[i.parent];
- ns.push(i.clone());
- ks.append(sc[i.parent]);
- } else {
- i.child++;
- ns.push(i.clone());
- res = hi[i.parent];
- }
- climb = false;
- break;
-
- case 2:
- res = hi[i.parent];
- ns.push(i.clone());
- if (ks.length() > 0) {
- ks.setLength(ks.length() - 1); // pop
- }
- climb = false;
- break;
-
- default:
- if (ns.empty()) {
- return -1;
- }
- climb = true;
- break;
- }
- }
- return res;
- }
-
- /**
- * traverse the tree to find next key
- */
- private int run() {
- if (cur == -1) {
- return -1;
- }
-
- boolean leaf = false;
- while (true) {
- // first go down on low branch until leaf or compressed branch
- while (cur != 0) {
- if (sc[cur] == 0xFFFF) {
- leaf = true;
- break;
- }
- ns.push(new Item((char)cur, '\u0000'));
- if (sc[cur] == 0) {
- leaf = true;
- break;
- }
- cur = lo[cur];
- }
- if (leaf) {
- break;
- }
- // nothing found, go up one node and try again
- cur = up();
- if (cur == -1) {
- return -1;
- }
- }
- // The current node should be a data node and
- // the key should be in the key stack (at least partially)
- StringBuffer buf = new StringBuffer(ks.toString());
- if (sc[cur] == 0xFFFF) {
- int p = lo[cur];
- while (kv.get(p) != 0) {
- buf.append(kv.get(p++));
- }
- }
- curkey = buf.toString();
- return 0;
- }
-
- }
-
- public void printStats() {
- System.out.println("Number of keys = " + Integer.toString(length));
- System.out.println("Node count = " + Integer.toString(freenode));
- // System.out.println("Array length = " + Integer.toString(eq.length));
- System.out.println("Key Array length = "
- + Integer.toString(kv.length()));
-
- /*
- * for(int i=0; i<kv.length(); i++)
- * if ( kv.get(i) != 0 )
- * System.out.print(kv.get(i));
- * else
- * System.out.println("");
- * System.out.println("Keys:");
- * for(Enumeration enum = keys(); enum.hasMoreElements(); )
- * System.out.println(enum.nextElement());
- */
-
- }
-
-/* public static void main(String[] args) throws Exception {
- TernaryTree tt = new TernaryTree();
- tt.insert("Carlos", 'C');
- tt.insert("Car", 'r');
- tt.insert("palos", 'l');
- tt.insert("pa", 'p');
- tt.trimToSize();
- System.out.println((char)tt.find("Car"));
- System.out.println((char)tt.find("Carlos"));
- System.out.println((char)tt.find("alto"));
- tt.printStats();
- }*/
-
-}
-