10 files changed, 2131 insertions, 0 deletions
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/ByteVector.java b/src/main/java/com/lowagie/text/pdf/hyphenation/ByteVector.java
new file mode 100644
index 0000000..f3a83ec
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/ByteVector.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+import java.io.Serializable;
+
+/**
+ * This class implements a simple byte vector with access to the
+ * underlying array.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class ByteVector implements Serializable {
+
+    /**
+     * Capacity increment size
+     */
+    private static final int DEFAULT_BLOCK_SIZE = 2048;
+    private int blockSize;
+
+    /**
+     * The encapsulated array
+     */
+    private byte[] array;
+
+    /**
+     * Points to next free item
+     */
+    private int n;
+
+    public ByteVector() {
+        this(DEFAULT_BLOCK_SIZE);
+    }
+
+    public ByteVector(int capacity) {
+        if (capacity > 0) {
+            blockSize = capacity;
+        } else {
+            blockSize = DEFAULT_BLOCK_SIZE;
+        }
+        array = new byte[blockSize];
+        n = 0;
+    }
+
+    public ByteVector(byte[] a) {
+        blockSize = DEFAULT_BLOCK_SIZE;
+        array = a;
+        n = 0;
+    }
+
+    public ByteVector(byte[] a, int capacity) {
+        if (capacity > 0) {
+            blockSize = capacity;
+        } else {
+            blockSize = DEFAULT_BLOCK_SIZE;
+        }
+        array = a;
+        n = 0;
+    }
+
+    public byte[] getArray() {
+        return array;
+    }
+
+    /**
+     * return number of items in array
+     */
+    public int length() {
+        return n;
+    }
+
+    /**
+     * returns current capacity of array
+     */
+    public int capacity() {
+        return array.length;
+    }
+
+    public void put(int index, byte val) {
+        array[index] = val;
+    }
+
+    public byte get(int index) {
+        return array[index];
+    }
+
+    /**
+     * This is to implement memory allocation in the array. Like malloc().
+     */
+    public int alloc(int size) {
+        int index = n;
+        int len = array.length;
+        if (n + size >= len) {
+            byte[] aux = new byte[len + blockSize];
+            System.arraycopy(array, 0, aux, 0, len);
+            array = aux;
+        }
+        n += size;
+        return index;
+    }
+
+    public void trimToSize() {
+        if (n < array.length) {
+            byte[] aux = new byte[n];
+            System.arraycopy(array, 0, aux, 0, n);
+            array = aux;
+        }
+    }
+
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/CharVector.java b/src/main/java/com/lowagie/text/pdf/hyphenation/CharVector.java
new file mode 100644
index 0000000..777cc7a
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/CharVector.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+import java.io.Serializable;
+
+/**
+ * This class implements a simple char vector with access to the
+ * underlying array.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class CharVector implements Cloneable, Serializable {
+
+    /**
+     * Capacity increment size
+     */
+    private static final int DEFAULT_BLOCK_SIZE = 2048;
+    private int blockSize;
+
+    /**
+     * The encapsulated array
+     */
+    private char[] array;
+
+    /**
+     * Points to next free item
+     */
+    private int n;
+
+    public CharVector() {
+        this(DEFAULT_BLOCK_SIZE);
+    }
+
+    public CharVector(int capacity) {
+        if (capacity > 0) {
+            blockSize = capacity;
+        } else {
+            blockSize = DEFAULT_BLOCK_SIZE;
+        }
+        array = new char[blockSize];
+        n = 0;
+    }
+
+    public CharVector(char[] a) {
+        blockSize = DEFAULT_BLOCK_SIZE;
+        array = a;
+        n = a.length;
+    }
+
+    public CharVector(char[] a, int capacity) {
+        if (capacity > 0) {
+            blockSize = capacity;
+        } else {
+            blockSize = DEFAULT_BLOCK_SIZE;
+        }
+        array = a;
+        n = a.length;
+    }
+
+    /**
+     * Reset Vector but don't resize or clear elements
+     */
+    public void clear() {
+        n = 0;
+    }
+
+    public Object clone() {
+        CharVector cv = new CharVector((char[])array.clone(), blockSize);
+        cv.n = this.n;
+        return cv;
+    }
+
+    public char[] getArray() {
+        return array;
+    }
+
+    /**
+     * return number of items in array
+     */
+    public int length() {
+        return n;
+    }
+
+    /**
+     * returns current capacity of array
+     */
+    public int capacity() {
+        return array.length;
+    }
+
+    public void put(int index, char val) {
+        array[index] = val;
+    }
+
+    public char get(int index) {
+        return array[index];
+    }
+
+    public int alloc(int size) {
+        int index = n;
+        int len = array.length;
+        if (n + size >= len) {
+            char[] aux = new char[len + blockSize];
+            System.arraycopy(array, 0, aux, 0, len);
+            array = aux;
+        }
+        n += size;
+        return index;
+    }
+
+    public void trimToSize() {
+        if (n < array.length) {
+            char[] aux = new char[n];
+            System.arraycopy(array, 0, aux, 0, n);
+            array = aux;
+        }
+    }
+
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphen.java b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphen.java
new file mode 100644
index 0000000..7e2522c
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphen.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+import java.io.Serializable;
+
+/**
+ * This class represents a hyphen. A 'full' hyphen is made of 3 parts:
+ * the pre-break text, post-break text and no-break. If no line-break
+ * is generated at this position, the no-break text is used, otherwise,
+ * pre-break and post-break are used. Typically, pre-break is equal to
+ * the hyphen character and the others are empty. However, this general
+ * scheme allows support for cases in some languages where words change
+ * spelling if they're split across lines, like german's 'backen' which
+ * hyphenates 'bak-ken'. BTW, this comes from TeX.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+
+public class Hyphen implements Serializable {
+    public String preBreak;
+    public String noBreak;
+    public String postBreak;
+
+    Hyphen(String pre, String no, String post) {
+        preBreak = pre;
+        noBreak = no;
+        postBreak = post;
+    }
+
+    Hyphen(String pre) {
+        preBreak = pre;
+        noBreak = null;
+        postBreak = null;
+    }
+
+    public String toString() {
+        if (noBreak == null 
+                && postBreak == null 
+                && preBreak != null
+                && preBreak.equals("-")) {
+            return "-";
+                }
+        StringBuffer res = new StringBuffer("{");
+        res.append(preBreak);
+        res.append("}{");
+        res.append(postBreak);
+        res.append("}{");
+        res.append(noBreak);
+        res.append('}');
+        return res.toString();
+    }
+
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenation.java b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenation.java
new file mode 100644
index 0000000..c86ab87
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenation.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+/**
+ * This class represents a hyphenated word.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class Hyphenation {
+    
+    private int[] hyphenPoints;
+    private String word;
+
+    /**
+     * number of hyphenation points in word
+     */
+    private int len;
+
+    /**
+     * rawWord as made of alternating strings and {@link Hyphen Hyphen}
+     * instances
+     */
+    Hyphenation(String word, int[] points) {
+        this.word = word;
+        hyphenPoints = points;
+        len = points.length;
+    }
+
+    /**
+     * @return the number of hyphenation points in the word
+     */
+    public int length() {
+        return len;
+    }
+
+    /**
+     * @return the pre-break text, not including the hyphen character
+     */
+    public String getPreHyphenText(int index) {
+        return word.substring(0, hyphenPoints[index]);
+    }
+
+    /**
+     * @return the post-break text
+     */
+    public String getPostHyphenText(int index) {
+        return word.substring(hyphenPoints[index]);
+    }
+
+    /**
+     * @return the hyphenation points
+     */
+    public int[] getHyphenationPoints() {
+        return hyphenPoints;
+    }
+
+    public String toString() {
+        StringBuffer str = new StringBuffer();
+        int start = 0;
+        for (int i = 0; i < len; i++) {
+            str.append(word.substring(start, hyphenPoints[i]) + "-");
+            start = hyphenPoints[i];
+        }
+        str.append(word.substring(start));
+        return str.toString();
+    }
+
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationException.java b/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationException.java
new file mode 100644
index 0000000..575e18d
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationException.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+/**
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class HyphenationException extends Exception {
+
+    public HyphenationException(String msg) {
+        super(msg);
+    }
+
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationTree.java b/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationTree.java
new file mode 100644
index 0000000..b0d4c0b
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/HyphenationTree.java
@@ -0,0 +1,454 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: HyphenationTree.java,v 1.40 2005/07/16 16:49:34 blowagie Exp $ */
+ 
+package com.lowagie.text.pdf.hyphenation;
+
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/**
+ * This tree structure stores the hyphenation patterns in an efficient
+ * way for fast lookup. It provides the provides the method to
+ * hyphenate a word.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class HyphenationTree extends TernaryTree 
+            implements PatternConsumer, Serializable {
+
+    /**
+     * value space: stores the inteletter values
+     */
+    protected ByteVector vspace;
+
+    /**
+     * This map stores hyphenation exceptions
+     */
+    protected HashMap stoplist;
+
+    /**
+     * This map stores the character classes
+     */
+    protected TernaryTree classmap;
+
+    /**
+     * Temporary map to store interletter values on pattern loading.
+     */
+    private transient TernaryTree ivalues;
+
+    public HyphenationTree() {
+        stoplist = new HashMap(23);    // usually a small table
+        classmap = new TernaryTree();
+        vspace = new ByteVector();
+        vspace.alloc(1);    // this reserves index 0, which we don't use
+    }
+
+    /**
+     * Packs the values by storing them in 4 bits, two values into a byte
+     * Values range is from 0 to 9. We use zero as terminator,
+     * so we'll add 1 to the value.
+     * @param values a string of digits from '0' to '9' representing the
+     * interletter values.
+     * @return the index into the vspace array where the packed values
+     * are stored.
+     */
+    protected int packValues(String values) {
+        int i, n = values.length();
+        int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
+        int offset = vspace.alloc(m);
+        byte[] va = vspace.getArray();
+        for (i = 0; i < n; i++) {
+            int j = i >> 1;
+            byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f);
+            if ((i & 1) == 1) {
+                va[j + offset] = (byte)(va[j + offset] | v);
+            } else {
+                va[j + offset] = (byte)(v << 4);    // big endian
+            }
+        }
+        va[m - 1 + offset] = 0;    // terminator
+        return offset;
+    }
+
+    protected String unpackValues(int k) {
+        StringBuffer buf = new StringBuffer();
+        byte v = vspace.get(k++);
+        while (v != 0) {
+            char c = (char)((v >>> 4) - 1 + '0');
+            buf.append(c);
+            c = (char)(v & 0x0f);
+            if (c == 0) {
+                break;
+            }
+            c = (char)(c - 1 + '0');
+            buf.append(c);
+            v = vspace.get(k++);
+        }
+        return buf.toString();
+    }
+
+    public void loadSimplePatterns(InputStream stream) throws HyphenationException {
+        SimplePatternParser pp = new SimplePatternParser();
+        ivalues = new TernaryTree();
+
+        pp.parse(stream, this);
+
+        // patterns/values should be now in the tree
+        // let's optimize a bit
+        trimToSize();
+        vspace.trimToSize();
+        classmap.trimToSize();
+
+        // get rid of the auxiliary map
+        ivalues = null;
+    }
+
+
+    public String findPattern(String pat) {
+        int k = super.find(pat);
+        if (k >= 0) {
+            return unpackValues(k);
+        }
+        return "";
+    }
+
+    /**
+     * String compare, returns 0 if equal or
+     * t is a substring of s
+     */
+    protected int hstrcmp(char[] s, int si, char[] t, int ti) {
+        for (; s[si] == t[ti]; si++, ti++) {
+            if (s[si] == 0) {
+                return 0;
+            }
+        }
+        if (t[ti] == 0) {
+            return 0;
+        }
+        return s[si] - t[ti];
+    }
+
+    protected byte[] getValues(int k) {
+        StringBuffer buf = new StringBuffer();
+        byte v = vspace.get(k++);
+        while (v != 0) {
+            char c = (char)((v >>> 4) - 1);
+            buf.append(c);
+            c = (char)(v & 0x0f);
+            if (c == 0) {
+                break;
+            }
+            c = (char)(c - 1);
+            buf.append(c);
+            v = vspace.get(k++);
+        }
+        byte[] res = new byte[buf.length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = (byte)buf.charAt(i);
+        }
+        return res;
+    }
+
+    /**
+     * <p>Search for all possible partial matches of word starting
+     * at index an update interletter values. In other words, it
+     * does something like:</p>
+     * <code>
+     * for(i=0; i<patterns.length; i++) {
+     * if ( word.substring(index).startsWidth(patterns[i]) )
+     * update_interletter_values(patterns[i]);
+     * }
+     * </code>
+     * <p>But it is done in an efficient way since the patterns are
+     * stored in a ternary tree. In fact, this is the whole purpose
+     * of having the tree: doing this search without having to test
+     * every single pattern. The number of patterns for languages
+     * such as English range from 4000 to 10000. Thus, doing thousands
+     * of string comparisons for each word to hyphenate would be
+     * really slow without the tree. The tradeoff is memory, but
+     * using a ternary tree instead of a trie, almost halves the
+     * the memory used by Lout or TeX. It's also faster than using
+     * a hash table</p>
+     * @param word null terminated word to match
+     * @param index start index from word
+     * @param il interletter values array to update
+     */
+    protected void searchPatterns(char[] word, int index, byte[] il) {
+        byte[] values;
+        int i = index;
+        char p, q;
+        char sp = word[i];
+        p = root;
+
+        while (p > 0 && p < sc.length) {
+            if (sc[p] == 0xFFFF) {
+                if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) {
+                    values = getValues(eq[p]);    // data pointer is in eq[]
+                    int j = index;
+                    for (int k = 0; k < values.length; k++) {
+                        if (j < il.length && values[k] > il[j]) {
+                            il[j] = values[k];
+                        }
+                        j++;
+                    }
+                }
+                return;
+            }
+            int d = sp - sc[p];
+            if (d == 0) {
+                if (sp == 0) {
+                    break;
+                }
+                sp = word[++i];
+                p = eq[p];
+                q = p;
+
+                // look for a pattern ending at this position by searching for
+                // the null char ( splitchar == 0 )
+                while (q > 0 && q < sc.length) {
+                    if (sc[q] == 0xFFFF) {        // stop at compressed branch
+                        break;
+                    }
+                    if (sc[q] == 0) {
+                        values = getValues(eq[q]);
+                        int j = index;
+                        for (int k = 0; k < values.length; k++) {
+                            if (j < il.length && values[k] > il[j]) {
+                                il[j] = values[k];
+                            }
+                            j++;
+                        }
+                        break;
+                    } else {
+                        q = lo[q];
+
+                        /**
+                         * actually the code should be:
+                         * q = sc[q] < 0 ? hi[q] : lo[q];
+                         * but java chars are unsigned
+                         */
+                    }
+                }
+            } else {
+                p = d < 0 ? lo[p] : hi[p];
+            }
+        }
+    }
+
+    /**
+     * Hyphenate word and return a Hyphenation object.
+     * @param word the word to be hyphenated
+     * @param remainCharCount Minimum number of characters allowed
+     * before the hyphenation point.
+     * @param pushCharCount Minimum number of characters allowed after
+     * the hyphenation point.
+     * @return a {@link Hyphenation Hyphenation} object representing
+     * the hyphenated word or null if word is not hyphenated.
+     */
+    public Hyphenation hyphenate(String word, int remainCharCount,
+                                 int pushCharCount) {
+        char[] w = word.toCharArray();
+        return hyphenate(w, 0, w.length, remainCharCount, pushCharCount);
+    }
+
+    /**
+     * w = "****nnllllllnnn*****",
+     * where n is a non-letter, l is a letter,
+     * all n may be absent, the first n is at offset,
+     * the first l is at offset + iIgnoreAtBeginning;
+     * word = ".llllll.'\0'***",
+     * where all l in w are copied into word.
+     * In the first part of the routine len = w.length,
+     * in the second part of the routine len = word.length.
+     * Three indices are used:
+     * index(w), the index in w,
+     * index(word), the index in word,
+     * letterindex(word), the index in the letter part of word.
+     * The following relations exist:
+     * index(w) = offset + i - 1
+     * index(word) = i - iIgnoreAtBeginning
+     * letterindex(word) = index(word) - 1
+     * (see first loop).
+     * It follows that:
+     * index(w) - index(word) = offset - 1 + iIgnoreAtBeginning
+     * index(w) = letterindex(word) + offset + iIgnoreAtBeginning
+     */
+
+    /**
+     * Hyphenate word and return an array of hyphenation points.
+     * @param w char array that contains the word
+     * @param offset Offset to first character in word
+     * @param len Length of word
+     * @param remainCharCount Minimum number of characters allowed
+     * before the hyphenation point.
+     * @param pushCharCount Minimum number of characters allowed after
+     * the hyphenation point.
+     * @return a {@link Hyphenation Hyphenation} object representing
+     * the hyphenated word or null if word is not hyphenated.
+     */
+    public Hyphenation hyphenate(char[] w, int offset, int len,
+                                 int remainCharCount, int pushCharCount) {
+        int i;
+        char[] word = new char[len + 3];
+
+        // normalize word
+        char[] c = new char[2];
+        int iIgnoreAtBeginning = 0;
+        int iLength = len;
+        boolean bEndOfLetters = false;
+        for (i = 1; i <= len; i++) {
+            c[0] = w[offset + i - 1];
+            int nc = classmap.find(c, 0);
+            if (nc < 0) {    // found a non-letter character ...
+                if (i == (1 + iIgnoreAtBeginning)) {
+                    // ... before any letter character
+                    iIgnoreAtBeginning ++;
+                } else {
+                    // ... after a letter character
+                    bEndOfLetters = true;
+                }
+                iLength --;
+            } else {
+                if (!bEndOfLetters) {
+                    word[i - iIgnoreAtBeginning] = (char)nc;
+                } else {
+                    return null;
+                }
+            }
+        }
+        len = iLength;
+        if (len < (remainCharCount + pushCharCount)) {
+            // word is too short to be hyphenated
+            return null;
+        }
+        int[] result = new int[len + 1];
+        int k = 0;
+
+        // check exception list first
+        String sw = new String(word, 1, len);
+        if (stoplist.containsKey(sw)) {
+            // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
+            ArrayList hw = (ArrayList)stoplist.get(sw);
+            int j = 0;
+            for (i = 0; i < hw.size(); i++) {
+                Object o = hw.get(i);
+                // j = index(sw) = letterindex(word)?
+                // result[k] = corresponding index(w)
+                if (o instanceof String) {
+                    j += ((String)o).length();
+                    if (j >= remainCharCount && j < (len - pushCharCount)) {
+                        result[k++] = j + iIgnoreAtBeginning;
+                    }
+                }
+            }
+        } else {
+            // use algorithm to get hyphenation points
+            word[0] = '.';                    // word start marker
+            word[len + 1] = '.';              // word end marker
+            word[len + 2] = 0;                // null terminated
+            byte[] il = new byte[len + 3];    // initialized to zero
+            for (i = 0; i < len + 1; i++) {
+                searchPatterns(word, i, il);
+            }
+
+            // hyphenation points are located where interletter value is odd
+            // i is letterindex(word),
+            // i + 1 is index(word),
+            // result[k] = corresponding index(w)
+            for (i = 0; i < len; i++) {
+                if (((il[i + 1] & 1) == 1) && i >= remainCharCount
+                        && i <= (len - pushCharCount)) {
+                    result[k++] = i + iIgnoreAtBeginning;
+                }
+            }
+        }
+
+
+        if (k > 0) {
+            // trim result array
+            int[] res = new int[k];
+            System.arraycopy(result, 0, res, 0, k);
+            return new Hyphenation(new String(w, offset, len), res);
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Add a character class to the tree. It is used by
+     * {@link SimplePatternParser SimplePatternParser} as callback to
+     * add character classes. Character classes define the
+     * valid word characters for hyphenation. If a word contains
+     * a character not defined in any of the classes, it is not hyphenated.
+     * It also defines a way to normalize the characters in order
+     * to compare them with the stored patterns. Usually pattern
+     * files use only lower case characters, in this case a class
+     * for letter 'a', for example, should be defined as "aA", the first
+     * character being the normalization char.
+     */
+    public void addClass(String chargroup) {
+        if (chargroup.length() > 0) {
+            char equivChar = chargroup.charAt(0);
+            char[] key = new char[2];
+            key[1] = 0;
+            for (int i = 0; i < chargroup.length(); i++) {
+                key[0] = chargroup.charAt(i);
+                classmap.insert(key, 0, equivChar);
+            }
+        }
+    }
+
+    /**
+     * Add an exception to the tree. It is used by
+     * {@link SimplePatternParser SimplePatternParser} class as callback to
+     * store the hyphenation exceptions.
+     * @param word normalized word
+     * @param hyphenatedword a vector of alternating strings and
+     * {@link Hyphen hyphen} objects.
+     */
+    public void addException(String word, ArrayList hyphenatedword) {
+        stoplist.put(word, hyphenatedword);
+    }
+
+    /**
+     * Add a pattern to the tree. Mainly, to be used by
+     * {@link SimplePatternParser SimplePatternParser} class as callback to
+     * add a pattern to the tree.
+     * @param pattern the hyphenation pattern
+     * @param ivalue interletter weight values indicating the
+     * desirability and priority of hyphenating at a given point
+     * within the pattern. It should contain only digit characters.
+     * (i.e. '0' to '9').
+     */
+    public void addPattern(String pattern, String ivalue) {
+        int k = ivalues.find(ivalue);
+        if (k <= 0) {
+            k = packValues(ivalue);
+            ivalues.insert(ivalue, (char)k);
+        }
+        insert(pattern, (char)k);
+    }
+
+    public void printStats() {
+        System.out.println("Value space size = "
+                           + Integer.toString(vspace.length()));
+        super.printStats();
+    }
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenator.java b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenator.java
new file mode 100644
index 0000000..bdd34b9
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/Hyphenator.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.Hashtable;
+
+import com.lowagie.text.pdf.BaseFont;
+
+/**
+ * This class is the main entry point to the hyphenation package.
+ * You can use only the static methods or create an instance.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class Hyphenator {
+    
+    /** TODO: Don't use statics */
+    private static Hashtable hyphenTrees = new Hashtable();
+
+    private HyphenationTree hyphenTree = null;
+    private int remainCharCount = 2;
+    private int pushCharCount = 2;
+    private static boolean errorDump = false;
+    private static final String defaultHyphLocation = "com/lowagie/text/pdf/hyphenation/hyph/";
+   
+    /** Holds value of property hyphenDir. */
+    private static String hyphenDir = "";    
+
+    /**
+     * @param lang
+     * @param country
+     * @param leftMin
+     * @param rightMin
+     */
+    public Hyphenator(String lang, String country, int leftMin,
+                      int rightMin) {
+        hyphenTree = getHyphenationTree(lang, country);
+        remainCharCount = leftMin;
+        pushCharCount = rightMin;
+    }
+
+    /**
+     * @param lang
+     * @param country
+     * @return the hyphenation tree
+     */
+    public static HyphenationTree getHyphenationTree(String lang,
+            String country) {
+        String key = lang;
+        // check whether the country code has been used
+        if (country != null && !country.equals("none")) {
+            key += "_" + country;
+        }
+            // first try to find it in the cache
+        if (hyphenTrees.containsKey(key)) {
+            return (HyphenationTree)hyphenTrees.get(key);
+        }
+        if (hyphenTrees.containsKey(lang)) {
+            return (HyphenationTree)hyphenTrees.get(lang);
+        }
+
+        HyphenationTree hTree = getResourceHyphenationTree(key);
+        if (hTree == null)
+            hTree = getFileHyphenationTree(key);
+        // put it into the pattern cache
+        if (hTree != null) {
+            hyphenTrees.put(key, hTree);
+        }
+        return hTree;
+    }
+
+    /**
+     * @param key
+     * @return a hyphenation tree
+     */
+    public static HyphenationTree getResourceHyphenationTree(String key) {
+        try {
+            InputStream stream = BaseFont.getResourceStream(defaultHyphLocation + key + ".xml");
+            if (stream == null && key.length() > 2)
+                stream = BaseFont.getResourceStream(defaultHyphLocation + key.substring(0, 2) + ".xml");
+            if (stream == null)
+                return null;
+            HyphenationTree hTree = new HyphenationTree();
+            hTree.loadSimplePatterns(stream);
+            return hTree;
+        }
+        catch (Exception e) {
+            return null;
+        }
+    }
+
+    /**
+     * @param key
+     * @return a hyphenation tree
+     */
+    public static HyphenationTree getFileHyphenationTree(String key) {
+        try {
+            if (hyphenDir == null)
+                return null;
+            InputStream stream = null;
+            File hyphenFile = new File(hyphenDir, key + ".xml");
+            if (hyphenFile.canRead())
+                stream = new FileInputStream(hyphenFile);
+            if (stream == null && key.length() > 2) {
+                hyphenFile = new File(hyphenDir, key.substring(0, 2) + ".xml");
+                if (hyphenFile.canRead())
+                    stream = new FileInputStream(hyphenFile);
+            }
+            if (stream == null)
+                return null;
+            HyphenationTree hTree = new HyphenationTree();
+            hTree.loadSimplePatterns(stream);
+            return hTree;
+        }
+        catch (Exception e) {
+            return null;
+        }
+    }
+
+    /**
+     * @param lang
+     * @param country
+     * @param word
+     * @param leftMin
+     * @param rightMin
+     * @return a hyphenation object
+     */
+    public static Hyphenation hyphenate(String lang, String country,
+                                        String word, int leftMin,
+                                        int rightMin) {
+        HyphenationTree hTree = getHyphenationTree(lang, country);
+        if (hTree == null) {
+            //log.error("Error building hyphenation tree for language "
+            //                       + lang);
+            return null;
+        }
+        return hTree.hyphenate(word, leftMin, rightMin);
+    }
+
+    /**
+     * @param lang
+     * @param country
+     * @param word
+     * @param offset
+     * @param len
+     * @param leftMin
+     * @param rightMin
+     * @return a hyphenation object
+     */
+    public static Hyphenation hyphenate(String lang, String country,
+                                        char[] word, int offset, int len,
+                                        int leftMin, int rightMin) {
+        HyphenationTree hTree = getHyphenationTree(lang, country);
+        if (hTree == null) {
+            //log.error("Error building hyphenation tree for language "
+            //                       + lang);
+            return null;
+        }
+        return hTree.hyphenate(word, offset, len, leftMin, rightMin);
+    }
+
+    /**
+     * @param min
+     */
+    public void setMinRemainCharCount(int min) {
+        remainCharCount = min;
+    }
+
+    /**
+     * @param min
+     */
+    public void setMinPushCharCount(int min) {
+        pushCharCount = min;
+    }
+
+    /**
+     * @param lang
+     * @param country
+     */
+    public void setLanguage(String lang, String country) {
+        hyphenTree = getHyphenationTree(lang, country);
+    }
+
+    /**
+     * @param word
+     * @param offset
+     * @param len
+     * @return a hyphenation object
+     */
+    public Hyphenation hyphenate(char[] word, int offset, int len) {
+        if (hyphenTree == null) {
+            return null;
+        }
+        return hyphenTree.hyphenate(word, offset, len, remainCharCount,
+                                    pushCharCount);
+    }
+
+    /**
+     * @param word
+     * @return a hyphenation object
+     */
+    public Hyphenation hyphenate(String word) {
+        if (hyphenTree == null) {
+            return null;
+        }
+        return hyphenTree.hyphenate(word, remainCharCount, pushCharCount);
+    }
+
+    /** Getter for property hyphenDir.
+     * @return Value of property hyphenDir.
+     */
+    public static String getHyphenDir() {
+        return hyphenDir;
+    }
+    
+    /** Setter for property hyphenDir.
+     * @param _hyphenDir New value of property hyphenDir.
+     */
+    public static void setHyphenDir(String _hyphenDir) {
+        hyphenDir = _hyphenDir;
+    }
+    
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/PatternConsumer.java b/src/main/java/com/lowagie/text/pdf/hyphenation/PatternConsumer.java
new file mode 100644
index 0000000..d7c6b63
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/PatternConsumer.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+import java.util.ArrayList;
+
+/**
+ * This interface is used to connect the XML pattern file parser to
+ * the hyphenation tree.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public interface PatternConsumer {
+
+    /**
+     * Add a character class.
+     * A character class defines characters that are considered
+     * equivalent for the purpose of hyphenation (e.g. "aA"). It
+     * usually means to ignore case.
+     * @param chargroup character group
+     */
+    void addClass(String chargroup);
+
+    /**
+     * Add a hyphenation exception. An exception replaces the
+     * result obtained by the algorithm for cases for which this
+     * fails or the user wants to provide his own hyphenation.
+     * A hyphenatedword is a vector of alternating String's and
+     * {@link Hyphen Hyphen} instances
+     */
+    void addException(String word, ArrayList hyphenatedword);
+
+    /**
+     * Add hyphenation patterns.
+     * @param pattern the pattern
+     * @param values interletter values expressed as a string of
+     * digit characters.
+     */
+    void addPattern(String pattern, String values);
+
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/SimplePatternParser.java b/src/main/java/com/lowagie/text/pdf/hyphenation/SimplePatternParser.java
new file mode 100644
index 0000000..ade8885
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/SimplePatternParser.java
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2005 by Paulo Soares.
+ *
+ * The contents of this file are subject to the Mozilla Public License Version 1.1
+ * (the "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the License.
+ *
+ * The Original Code is 'iText, a free JAVA-PDF library'.
+ *
+ * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
+ * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
+ * All Rights Reserved.
+ * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
+ * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
+ *
+ * Contributor(s): all the names of the contributors are added in the source code
+ * where applicable.
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
+ * provisions of LGPL are applicable instead of those above.  If you wish to
+ * allow use of your version of this file only under the terms of the LGPL
+ * License and not to allow others to use your version of this file under
+ * the MPL, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the LGPL.
+ * If you do not delete the provisions above, a recipient may use your version
+ * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the MPL as stated above or under the terms of the GNU
+ * Library General Public License as published by the Free Software Foundation;
+ * either version 2 of the License, or any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
+ * details.
+ *
+ * If you didn't download this code from the following link, you should check if
+ * you aren't using an obsolete version:
+ * http://www.lowagie.com/iText/
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+import com.lowagie.text.pdf.*;
+import com.lowagie.text.ExceptionConverter;
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.FileInputStream;
+
+/** Parses the xml hyphenation pattern.
+ *
+ * @author Paulo Soares (psoares@consiste.pt)
+ */
+public class SimplePatternParser implements SimpleXMLDocHandler, PatternConsumer {
+    int currElement;
+    PatternConsumer consumer;
+    StringBuffer token;
+    ArrayList exception;
+    char hyphenChar;
+    SimpleXMLParser parser;
+    
+    static final int ELEM_CLASSES = 1;
+    static final int ELEM_EXCEPTIONS = 2;
+    static final int ELEM_PATTERNS = 3;
+    static final int ELEM_HYPHEN = 4;
+
+    /** Creates a new instance of PatternParser2 */
+    public SimplePatternParser() {
+        token = new StringBuffer();
+        hyphenChar = '-';    // default
+    }
+    
+    public void parse(InputStream stream, PatternConsumer consumer) {
+        this.consumer = consumer;
+        try {
+            SimpleXMLParser.parse(this, stream);
+        }
+        catch (IOException e) {
+            throw new ExceptionConverter(e);
+        }
+        finally {
+            try{stream.close();}catch(Exception e){}
+        }
+    }
+    
+    protected static String getPattern(String word) {
+        StringBuffer pat = new StringBuffer();
+        int len = word.length();
+        for (int i = 0; i < len; i++) {
+            if (!Character.isDigit(word.charAt(i))) {
+                pat.append(word.charAt(i));
+            }
+        }
+        return pat.toString();
+    }
+
+    protected ArrayList normalizeException(ArrayList ex) {
+        ArrayList res = new ArrayList();
+        for (int i = 0; i < ex.size(); i++) {
+            Object item = ex.get(i);
+            if (item instanceof String) {
+                String str = (String)item;
+                StringBuffer buf = new StringBuffer();
+                for (int j = 0; j < str.length(); j++) {
+                    char c = str.charAt(j);
+                    if (c != hyphenChar) {
+                        buf.append(c);
+                    } else {
+                        res.add(buf.toString());
+                        buf.setLength(0);
+                        char[] h = new char[1];
+                        h[0] = hyphenChar;
+                        // we use here hyphenChar which is not necessarily
+                        // the one to be printed
+                        res.add(new Hyphen(new String(h), null, null));
+                    }
+                }
+                if (buf.length() > 0) {
+                    res.add(buf.toString());
+                }
+            } else {
+                res.add(item);
+            }
+        }
+        return res;
+    }
+
+    protected String getExceptionWord(ArrayList ex) {
+        StringBuffer res = new StringBuffer();
+        for (int i = 0; i < ex.size(); i++) {
+            Object item = ex.get(i);
+            if (item instanceof String) {
+                res.append((String)item);
+            } else {
+                if (((Hyphen)item).noBreak != null) {
+                    res.append(((Hyphen)item).noBreak);
+                }
+            }
+        }
+        return res.toString();
+    }
+
+    protected static String getInterletterValues(String pat) {
+        StringBuffer il = new StringBuffer();
+        String word = pat + "a";    // add dummy letter to serve as sentinel
+        int len = word.length();
+        for (int i = 0; i < len; i++) {
+            char c = word.charAt(i);
+            if (Character.isDigit(c)) {
+                il.append(c);
+                i++;
+            } else {
+                il.append('0');
+            }
+        }
+        return il.toString();
+    }
+
+    public void endDocument() {
+    }
+    
+    public void endElement(String tag) {
+        if (token.length() > 0) {
+            String word = token.toString();
+            switch (currElement) {
+            case ELEM_CLASSES:
+                consumer.addClass(word);
+                break;
+            case ELEM_EXCEPTIONS:
+                exception.add(word);
+                exception = normalizeException(exception);
+                consumer.addException(getExceptionWord(exception),
+                                      (ArrayList)exception.clone());
+                break;
+            case ELEM_PATTERNS:
+                consumer.addPattern(getPattern(word),
+                                    getInterletterValues(word));
+                break;
+            case ELEM_HYPHEN:
+                // nothing to do
+                break;
+            }
+            if (currElement != ELEM_HYPHEN) {
+                token.setLength(0);
+            }
+        }
+        if (currElement == ELEM_HYPHEN) {
+            currElement = ELEM_EXCEPTIONS;
+        } else {
+            currElement = 0;
+        }
+    }
+    
+    public void startDocument() {
+    }
+    
+    public void startElement(String tag, java.util.HashMap h) {
+        if (tag.equals("hyphen-char")) {
+            String hh = (String)h.get("value");
+            if (hh != null && hh.length() == 1) {
+                hyphenChar = hh.charAt(0);
+            }
+        } else if (tag.equals("classes")) {
+            currElement = ELEM_CLASSES;
+        } else if (tag.equals("patterns")) {
+            currElement = ELEM_PATTERNS;
+        } else if (tag.equals("exceptions")) {
+            currElement = ELEM_EXCEPTIONS;
+            exception = new ArrayList();
+        } else if (tag.equals("hyphen")) {
+            if (token.length() > 0) {
+                exception.add(token.toString());
+            }
+            exception.add(new Hyphen((String)h.get("pre"),
+                                            (String)h.get("no"),
+                                            (String)h.get("post")));
+            currElement = ELEM_HYPHEN;
+        }
+        token.setLength(0);
+    }
+    
+    public void text(String str) {
+        StringTokenizer tk = new StringTokenizer(str);
+        while (tk.hasMoreTokens()) {
+            String word = tk.nextToken();
+            // System.out.println("\"" + word + "\"");
+            switch (currElement) {
+            case ELEM_CLASSES:
+                consumer.addClass(word);
+                break;
+            case ELEM_EXCEPTIONS:
+                exception.add(word);
+                exception = normalizeException(exception);
+                consumer.addException(getExceptionWord(exception),
+                                      (ArrayList)exception.clone());
+                exception.clear();
+                break;
+            case ELEM_PATTERNS:
+                consumer.addPattern(getPattern(word),
+                                    getInterletterValues(word));
+                break;
+            }
+        }
+    }
+    
+    // PatternConsumer implementation for testing purposes
+    public void addClass(String c) {
+        System.out.println("class: " + c);
+    }
+
+    public void addException(String w, ArrayList e) {
+        System.out.println("exception: " + w + " : " + e.toString());
+    }
+
+    public void addPattern(String p, String v) {
+        System.out.println("pattern: " + p + " : " + v);
+    }
+
+    public static void main(String[] args) throws Exception {
+        try {
+            if (args.length > 0) {
+                SimplePatternParser pp = new SimplePatternParser();
+                pp.parse(new FileInputStream(args[0]), pp);
+            }
+        }
+        catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+}
diff --git a/src/main/java/com/lowagie/text/pdf/hyphenation/TernaryTree.java b/src/main/java/com/lowagie/text/pdf/hyphenation/TernaryTree.java
new file mode 100644
index 0000000..0dc16e5
--- /dev/null
+++ b/src/main/java/com/lowagie/text/pdf/hyphenation/TernaryTree.java
@@ -0,0 +1,667 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.lowagie.text.pdf.hyphenation;
+
+import java.util.Enumeration;
+import java.util.Stack;
+import java.io.Serializable;
+
+/**
+ * <h2>Ternary Search Tree.</h2>
+ *
+ * <p>A ternary search tree is a hibrid between a binary tree and
+ * a digital search tree (trie). Keys are limited to strings.
+ * A data value of type char is stored in each leaf node.
+ * It can be used as an index (or pointer) to the data.
+ * Branches that only contain one key are compressed to one node
+ * by storing a pointer to the trailer substring of the key.
+ * This class is intended to serve as base class or helper class
+ * to implement Dictionary collections or the like. Ternary trees
+ * have some nice properties as the following: the tree can be
+ * traversed in sorted order, partial matches (wildcard) can be
+ * implemented, retrieval of all keys within a given distance
+ * from the target, etc. The storage requirements are higher than
+ * a binary tree but a lot less than a trie. Performance is
+ * comparable with a hash table, sometimes it outperforms a hash
+ * function (most of the time can determine a miss faster than a hash).</p>
+ *
+ * <p>The main purpose of this java port is to serve as a base for
+ * implementing TeX's hyphenation algorithm (see The TeXBook,
+ * appendix H). Each language requires from 5000 to 15000 hyphenation
+ * patterns which will be keys in this tree. The strings patterns
+ * are usually small (from 2 to 5 characters), but each char in the
+ * tree is stored in a node. Thus memory usage is the main concern.
+ * We will sacrify 'elegance' to keep memory requirenments to the
+ * minimum. Using java's char type as pointer (yes, I know pointer
+ * it is a forbidden word in java) we can keep the size of the node
+ * to be just 8 bytes (3 pointers and the data char). This gives
+ * room for about 65000 nodes. In my tests the english patterns
+ * took 7694 nodes and the german patterns 10055 nodes,
+ * so I think we are safe.</p>
+ *
+ * <p>All said, this is a map with strings as keys and char as value.
+ * Pretty limited!. It can be extended to a general map by
+ * using the string representation of an object and using the
+ * char value as an index to an array that contains the object
+ * values.</p>
+ *
+ * @author cav@uniscope.co.jp
+ */
+
+public class TernaryTree implements Cloneable, Serializable {
+
+    /**
+     * We use 4 arrays to represent a node. I guess I should have created
+     * a proper node class, but somehow Knuth's pascal code made me forget
+     * we now have a portable language with virtual memory management and
+     * automatic garbage collection! And now is kind of late, furthermore,
+     * if it ain't broken, don't fix it.
+     */
+
+    /**
+     * Pointer to low branch and to rest of the key when it is
+     * stored directly in this node, we don't have unions in java!
+     */
+    protected char[] lo;
+
+    /**
+     * Pointer to high branch.
+     */
+    protected char[] hi;
+
+    /**
+     * Pointer to equal branch and to data when this node is a string terminator.
+     */
+    protected char[] eq;
+
+    /**
+     * <P>The character stored in this node: splitchar.
+     * Two special values are reserved:</P>
+     * <ul><li>0x0000 as string terminator</li>
+     * <li>0xFFFF to indicate that the branch starting at
+     * this node is compressed</li></ul>
+     * <p>This shouldn't be a problem if we give the usual semantics to
+     * strings since 0xFFFF is garanteed not to be an Unicode character.</p>
+     */
+    protected char[] sc;
+
+    /**
+     * This vector holds the trailing of the keys when the branch is compressed.
+     */
+    protected CharVector kv;
+
+    protected char root;
+    protected char freenode;
+    protected int length;    // number of items in tree
+
+    protected static final int BLOCK_SIZE = 2048;    // allocation size for arrays
+
+    TernaryTree() {
+        init();
+    }
+
+    protected void init() {
+        root = 0;
+        freenode = 1;
+        length = 0;
+        lo = new char[BLOCK_SIZE];
+        hi = new char[BLOCK_SIZE];
+        eq = new char[BLOCK_SIZE];
+        sc = new char[BLOCK_SIZE];
+        kv = new CharVector();
+    }
+
+    /**
+     * Branches are initially compressed, needing
+     * one node per key plus the size of the string
+     * key. They are decompressed as needed when
+     * another key with same prefix
+     * is inserted. This saves a lot of space,
+     * specially for long keys.
+     */
+    public void insert(String key, char val) {
+        // make sure we have enough room in the arrays
+        int len = key.length()
+                  + 1;    // maximum number of nodes that may be generated
+        if (freenode + len > eq.length) {
+            redimNodeArrays(eq.length + BLOCK_SIZE);
+        }
+        char strkey[] = new char[len--];
+        key.getChars(0, len, strkey, 0);
+        strkey[len] = 0;
+        root = insert(root, strkey, 0, val);
+    }
+
+    public void insert(char[] key, int start, char val) {
+        int len = strlen(key) + 1;
+        if (freenode + len > eq.length) {
+            redimNodeArrays(eq.length + BLOCK_SIZE);
+        }
+        root = insert(root, key, start, val);
+    }
+
+    /**
+     * The actual insertion function, recursive version.
+     */
+    private char insert(char p, char[] key, int start, char val) {
+        int len = strlen(key, start);
+        if (p == 0) {
+            // this means there is no branch, this node will start a new branch.
+            // Instead of doing that, we store the key somewhere else and create
+            // only one node with a pointer to the key
+            p = freenode++;
+            eq[p] = val;           // holds data
+            length++;
+            hi[p] = 0;
+            if (len > 0) {
+                sc[p] = 0xFFFF;    // indicates branch is compressed
+                lo[p] = (char)kv.alloc(len
+                                       + 1);    // use 'lo' to hold pointer to key
+                strcpy(kv.getArray(), lo[p], key, start);
+            } else {
+                sc[p] = 0;
+                lo[p] = 0;
+            }
+            return p;
+        }
+
+        if (sc[p] == 0xFFFF) {
+            // branch is compressed: need to decompress
+            // this will generate garbage in the external key array
+            // but we can do some garbage collection later
+            char pp = freenode++;
+            lo[pp] = lo[p];    // previous pointer to key
+            eq[pp] = eq[p];    // previous pointer to data
+            lo[p] = 0;
+            if (len > 0) {
+                sc[p] = kv.get(lo[pp]);
+                eq[p] = pp;
+                lo[pp]++;
+                if (kv.get(lo[pp]) == 0) {
+                    // key completly decompressed leaving garbage in key array
+                    lo[pp] = 0;
+                    sc[pp] = 0;
+                    hi[pp] = 0;
+                } else {
+                    // we only got first char of key, rest is still there
+                    sc[pp] = 0xFFFF;
+                }
+            } else {
+                // In this case we can save a node by swapping the new node
+                // with the compressed node
+                sc[pp] = 0xFFFF;
+                hi[p] = pp;
+                sc[p] = 0;
+                eq[p] = val;
+                length++;
+                return p;
+            }
+        }
+        char s = key[start];
+        if (s < sc[p]) {
+            lo[p] = insert(lo[p], key, start, val);
+        } else if (s == sc[p]) {
+            if (s != 0) {
+                eq[p] = insert(eq[p], key, start + 1, val);
+            } else {
+                // key already in tree, overwrite data
+                eq[p] = val;
+            }
+        } else {
+            hi[p] = insert(hi[p], key, start, val);
+        }
+        return p;
+    }
+
+    /**
+     * Compares 2 null terminated char arrays
+     */
+    public static int strcmp(char[] a, int startA, char[] b, int startB) {
+        for (; a[startA] == b[startB]; startA++, startB++) {
+            if (a[startA] == 0) {
+                return 0;
+            }
+        }
+        return a[startA] - b[startB];
+    }
+
+    /**
+     * Compares a string with null terminated char array
+     */
+    public static int strcmp(String str, char[] a, int start) {
+        int i, d, len = str.length();
+        for (i = 0; i < len; i++) {
+            d = (int)str.charAt(i) - a[start + i];
+            if (d != 0) {
+                return d;
+            }
+            if (a[start + i] == 0) {
+                return d;
+        }
+        }
+        if (a[start + i] != 0) {
+            return (int)-a[start + i];
+        }
+        return 0;
+
+    }
+
+    public static void strcpy(char[] dst, int di, char[] src, int si) {
+        while (src[si] != 0) {
+            dst[di++] = src[si++];
+        }
+        dst[di] = 0;
+    }
+
+    public static int strlen(char[] a, int start) {
+        int len = 0;
+        for (int i = start; i < a.length && a[i] != 0; i++) {
+            len++;
+        }
+        return len;
+    }
+
+    public static int strlen(char[] a) {
+        return strlen(a, 0);
+    }
+
+    public int find(String key) {
+        int len = key.length();
+        char strkey[] = new char[len + 1];
+        key.getChars(0, len, strkey, 0);
+        strkey[len] = 0;
+
+        return find(strkey, 0);
+    }
+
+    public int find(char[] key, int start) {
+        int d;
+        char p = root;
+        int i = start;
+        char c;
+
+        while (p != 0) {
+            if (sc[p] == 0xFFFF) {
+                if (strcmp(key, i, kv.getArray(), lo[p]) == 0) {
+                    return eq[p];
+                } else {
+                    return -1;
+            }
+            }
+            c = key[i];
+            d = c - sc[p];
+            if (d == 0) {
+                if (c == 0) {
+                    return eq[p];
+                }
+                i++;
+                p = eq[p];
+            } else if (d < 0) {
+                p = lo[p];
+            } else {
+                p = hi[p];
+        }
+        }
+        return -1;
+    }
+
+    public boolean knows(String key) {
+        return (find(key) >= 0);
+    }
+
+    // redimension the arrays
+    private void redimNodeArrays(int newsize) {
+        int len = newsize < lo.length ? newsize : lo.length;
+        char[] na = new char[newsize];
+        System.arraycopy(lo, 0, na, 0, len);
+        lo = na;
+        na = new char[newsize];
+        System.arraycopy(hi, 0, na, 0, len);
+        hi = na;
+        na = new char[newsize];
+        System.arraycopy(eq, 0, na, 0, len);
+        eq = na;
+        na = new char[newsize];
+        System.arraycopy(sc, 0, na, 0, len);
+        sc = na;
+    }
+
+    public int size() {
+        return length;
+    }
+
+    public Object clone() {
+        TernaryTree t = new TernaryTree();
+        t.lo = (char[])this.lo.clone();
+        t.hi = (char[])this.hi.clone();
+        t.eq = (char[])this.eq.clone();
+        t.sc = (char[])this.sc.clone();
+        t.kv = (CharVector)this.kv.clone();
+        t.root = this.root;
+        t.freenode = this.freenode;
+        t.length = this.length;
+
+        return t;
+    }
+
+    /**
+     * Recursively insert the median first and then the median of the
+     * lower and upper halves, and so on in order to get a balanced
+     * tree. The array of keys is assumed to be sorted in ascending
+     * order.
+     */
+    protected void insertBalanced(String[] k, char[] v, int offset, int n) {
+        int m;
+        if (n < 1) {
+            return;
+        }
+        m = n >> 1;
+
+        insert(k[m + offset], v[m + offset]);
+        insertBalanced(k, v, offset, m);
+
+        insertBalanced(k, v, offset + m + 1, n - m - 1);
+    }
+
+
+    /**
+     * Balance the tree for best search performance
+     */
+    public void balance() {
+        // System.out.print("Before root splitchar = "); System.out.println(sc[root]);
+
+        int i = 0, n = length;
+        String[] k = new String[n];
+        char[] v = new char[n];
+        Iterator iter = new Iterator();
+        while (iter.hasMoreElements()) {
+            v[i] = iter.getValue();
+            k[i++] = (String)iter.nextElement();
+        }
+        init();
+        insertBalanced(k, v, 0, n);
+
+        // With uniform letter distribution sc[root] should be around 'm'
+        // System.out.print("After root splitchar = "); System.out.println(sc[root]);
+    }
+
+    /**
+     * Each node stores a character (splitchar) which is part of
+     * some key(s). In a compressed branch (one that only contain
+     * a single string key) the trailer of the key which is not
+     * already in nodes is stored  externally in the kv array.
+     * As items are inserted, key substrings decrease.
+     * Some substrings may completely  disappear when the whole
+     * branch is totally decompressed.
+     * The tree is traversed to find the key substrings actually
+     * used. In addition, duplicate substrings are removed using
+     * a map (implemented with a TernaryTree!).
+     *
+     */
+    public void trimToSize() {
+        // first balance the tree for best performance
+        balance();
+
+        // redimension the node arrays
+        redimNodeArrays(freenode);
+
+        // ok, compact kv array
+        CharVector kx = new CharVector();
+        kx.alloc(1);
+        TernaryTree map = new TernaryTree();
+        compact(kx, map, root);
+        kv = kx;
+        kv.trimToSize();
+    }
+
+    private void compact(CharVector kx, TernaryTree map, char p) {
+        int k;
+        if (p == 0) {
+            return;
+        }
+        if (sc[p] == 0xFFFF) {
+            k = map.find(kv.getArray(), lo[p]);
+            if (k < 0) {
+                k = kx.alloc(strlen(kv.getArray(), lo[p]) + 1);
+                strcpy(kx.getArray(), k, kv.getArray(), lo[p]);
+                map.insert(kx.getArray(), k, (char)k);
+            }
+            lo[p] = (char)k;
+        } else {
+            compact(kx, map, lo[p]);
+            if (sc[p] != 0) {
+                compact(kx, map, eq[p]);
+            }
+            compact(kx, map, hi[p]);
+        }
+    }
+
+
+    public Enumeration keys() {
+        return new Iterator();
+    }
+
+    public class Iterator implements Enumeration {
+
+        /**
+         * current node index
+         */
+        int cur;
+
+        /**
+         * current key
+         */
+        String curkey;
+
+        private class Item implements Cloneable {
+            char parent;
+            char child;
+
+            public Item() {
+                parent = 0;
+                child = 0;
+            }
+
+            public Item(char p, char c) {
+                parent = p;
+                child = c;
+            }
+
+            public Object clone() {
+                return new Item(parent, child);
+            }
+
+        }
+
+        /**
+         * Node stack
+         */
+        Stack ns;
+
+        /**
+         * key stack implemented with a StringBuffer
+         */
+        StringBuffer ks;
+
+        public Iterator() {
+            cur = -1;
+            ns = new Stack();
+            ks = new StringBuffer();
+            rewind();
+        }
+
+        public void rewind() {
+            ns.removeAllElements();
+            ks.setLength(0);
+            cur = root;
+            run();
+        }
+
+        public Object nextElement() {
+            String res = new String(curkey);
+            cur = up();
+            run();
+            return res;
+        }
+
+        public char getValue() {
+            if (cur >= 0) {
+                return eq[cur];
+            }
+            return 0;
+        }
+
+        public boolean hasMoreElements() {
+            return (cur != -1);
+        }
+
+        /**
+         * traverse upwards
+         */
+        private int up() {
+            Item i = new Item();
+            int res = 0;
+
+            if (ns.empty()) {
+                return -1;
+            }
+
+            if (cur != 0 && sc[cur] == 0) {
+                return lo[cur];
+            }
+
+            boolean climb = true;
+
+            while (climb) {
+                i = (Item)ns.pop();
+                i.child++;
+                switch (i.child) {
+                case 1:
+                    if (sc[i.parent] != 0) {
+                        res = eq[i.parent];
+                        ns.push(i.clone());
+                        ks.append(sc[i.parent]);
+                    } else {
+                        i.child++;
+                        ns.push(i.clone());
+                        res = hi[i.parent];
+                    }
+                    climb = false;
+                    break;
+
+                case 2:
+                    res = hi[i.parent];
+                    ns.push(i.clone());
+                    if (ks.length() > 0) {
+                        ks.setLength(ks.length() - 1);    // pop
+                    }
+                    climb = false;
+                    break;
+
+                default:
+                    if (ns.empty()) {
+                        return -1;
+                    }
+                    climb = true;
+                    break;
+                }
+            }
+            return res;
+        }
+
+        /**
+         * traverse the tree to find next key
+         */
+        private int run() {
+            if (cur == -1) {
+                return -1;
+            }
+
+            boolean leaf = false;
+            while (true) {
+                // first go down on low branch until leaf or compressed branch
+                while (cur != 0) {
+                    if (sc[cur] == 0xFFFF) {
+                        leaf = true;
+                        break;
+                    }
+                    ns.push(new Item((char)cur, '\u0000'));
+                    if (sc[cur] == 0) {
+                        leaf = true;
+                        break;
+                    }
+                    cur = lo[cur];
+                }
+                if (leaf) {
+                    break;
+                }
+                    // nothing found, go up one node and try again
+                cur = up();
+                if (cur == -1) {
+                    return -1;
+                }
+            }
+            // The current node should be a data node and
+            // the key should be in the key stack (at least partially)
+            StringBuffer buf = new StringBuffer(ks.toString());
+            if (sc[cur] == 0xFFFF) {
+                int p = lo[cur];
+                while (kv.get(p) != 0) {
+                    buf.append(kv.get(p++));
+            }
+            }
+            curkey = buf.toString();
+            return 0;
+        }
+
+    }
+
+    public void printStats() {
+        System.out.println("Number of keys = " + Integer.toString(length));
+        System.out.println("Node count = " + Integer.toString(freenode));
+        // System.out.println("Array length = " + Integer.toString(eq.length));
+        System.out.println("Key Array length = "
+                           + Integer.toString(kv.length()));
+
+        /*
+         * for(int i=0; i<kv.length(); i++)
+         * if ( kv.get(i) != 0 )
+         * System.out.print(kv.get(i));
+         * else
+         * System.out.println("");
+         * System.out.println("Keys:");
+         * for(Enumeration enum = keys(); enum.hasMoreElements(); )
+         * System.out.println(enum.nextElement());
+         */
+
+    }
+
+/*    public static void main(String[] args) throws Exception {
+        TernaryTree tt = new TernaryTree();
+        tt.insert("Carlos", 'C');
+        tt.insert("Car", 'r');
+        tt.insert("palos", 'l');
+        tt.insert("pa", 'p');
+        tt.trimToSize();
+        System.out.println((char)tt.find("Car"));
+        System.out.println((char)tt.find("Carlos"));
+        System.out.println((char)tt.find("alto"));
+        tt.printStats();
+    }*/
+
+}
+