diff options
| author | mcentner <mcentner@8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4> | 2008-09-12 13:50:31 +0000 | 
|---|---|---|
| committer | mcentner <mcentner@8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4> | 2008-09-12 13:50:31 +0000 | 
| commit | 9f441a0aaf0e55b50014e814410c61117f7330c4 (patch) | |
| tree | e3662a76dfd40d5d8f0c31503dd0c7ec6f148c3d | |
| parent | 3794536434fdbb06067eddcfd248898ce85f85a1 (diff) | |
| download | mocca-9f441a0aaf0e55b50014e814410c61117f7330c4.tar.gz mocca-9f441a0aaf0e55b50014e814410c61117f7330c4.tar.bz2 mocca-9f441a0aaf0e55b50014e814410c61117f7330c4.zip | |
Add text validation.
git-svn-id: https://joinup.ec.europa.eu/svn/mocca/trunk@35 8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4
| -rw-r--r-- | BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java | 77 | ||||
| -rw-r--r-- | BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java | 155 | 
2 files changed, 231 insertions, 1 deletions
| diff --git a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java index 5108140d..485aa727 100644 --- a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java +++ b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java @@ -16,17 +16,92 @@  */  package at.gv.egiz.bku.text; +import java.io.IOException;  import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory;  import at.gv.egiz.bku.viewer.ValidationException;  import at.gv.egiz.bku.viewer.Validator;  public class TextValidator implements Validator { +  /** +   * Logging facility. +   */ +  protected static Log log = LogFactory.getLog(TextValidator.class); +   +  private void invalid(char c) throws ValidationException { +    log.info("Invalid character (0x" + Integer.toHexString(c) + ") found."); +    // TODO: localize +    throw new ValidationException(); +  } +      @Override    public void validate(InputStream is, String charset)        throws ValidationException { -    // TODO: implement character validation +     +    InputStreamReader reader; +    if (charset != null) { +      try { +        reader = new InputStreamReader(is, charset); +      } catch (UnsupportedEncodingException e) { +        log.info("Charset '" + charset + "' not supported.", e); +        // TODO: localize +        throw new ValidationException(e); +      } +    } else { +      reader = new InputStreamReader(is, Charset.forName("UTF-8")); +    } +     +    try { +      char c; +      CharBuffer cb = CharBuffer.allocate(256); +      for (int l; (l = reader.read(cb)) != -1;) { +        cb.flip(); +        for (int i = 0; i < l; i++) { +          c = cb.get(); +          if (c < '\u0020') { +            // C0 Controls and Basic Latin (0x000C-0x000D) +            if (c > '\r') invalid(c); if (c >= '\u000C') continue; +            // C0 Controls and Basic Latin (0x0009-0x000A) +            if (c > '\n') invalid(c); if (c >= '\t') continue; +            invalid(c); +          } else { +            // C0 Controls and Basic Latin (0x0020-0x007E) +            if (c <= '\u007E') continue; +            // C1 Controls and Latin-1 Supplement (0x00A1-0x00FF) +            if (c < '\u00A1') invalid(c); if (c <= '\u00FF') continue; +            // Latin Extended-A (0x0100-0x017F) +            if (c < '\u0100') invalid(c); if (c <= '\u017F') continue; +            // EURO Sign +            if (c == '\u20AC') continue; +            // Spacing Modifier Letters +            if (c == '\u02C7') continue; +            if (c == '\u02D8') continue; +            if (c == '\u02D9') continue; +            if (c == '\u02DB') continue; +            if (c == '\u02DD') continue; +            if (c == '\u2015') continue; +            invalid(c); +          } +        } +      } +      cb.clear(); +    } catch (IOException e) { +      // TODO: localize +      throw new ValidationException(e); +    } +     +     +        }  } diff --git a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java new file mode 100644 index 00000000..7137911d --- /dev/null +++ b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java @@ -0,0 +1,155 @@ +/* +* Copyright 2008 Federal Chancellery Austria and +* Graz University of Technology +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +*     http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package at.gv.egiz.bku.text; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; + +import org.junit.Ignore; +import org.junit.Test; + +import at.gv.egiz.bku.viewer.ValidationException; +import at.gv.egiz.bku.viewer.Validator; +import at.gv.egiz.bku.viewer.ValidatorFactory; + +public class TestTextValidator { + +  public static byte[] generateText(String encoding) throws UnsupportedEncodingException { +     +    ByteArrayOutputStream bos = new ByteArrayOutputStream(); +    PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, encoding)); + +    writer.write("C0 Controls and Basic Latin  0x0009-0x000A"); +    writer.write("\n"); +    for (char c = '\t'; c <= '\n'; c++) { +      writer.write(c); +    } +    writer.write("\n"); +    writer.write("C0 Controls and Basic Latin  0x000C-0x000D"); +    writer.write("\n"); +    for (char c = '\f'; c <= '\r'; c++) { +      writer.write(c); +    } +    writer.write("\n"); +    writer.write("C0 Controls and Basic Latin  0x0020-0x007E"); +    writer.write("\n"); +    for (char c = '\u0020'; c <= '\u007E'; c++) { +      writer.write(c); +    } +    writer.write("\n"); +    writer.write("C1 Controls and Latin-1 Supplement  0x00A1-0x00FF"); +    writer.write("\n"); +    for (char c = '\u00A1'; c <= '\u00FF'; c++) { +      writer.write(c); +    } +    writer.write("\n"); +    writer.write("Latin Extended-A  0x0100-0x017F"); +    writer.write("\n"); +    for (char c = '\u0100'; c <= '\u017F'; c++) { +      writer.write(c); +    } +    writer.write("\n"); +    writer.write("Spacing Modifier Letters  0x02C7"); +    writer.write("\n"); +    writer.write("\u02C7"); +    writer.write("\n"); +    writer.write("Spacing Modifier Letters  0x02D8"); +    writer.write("\n"); +    writer.write("\u02D8"); +    writer.write("\n"); +    writer.write("Spacing Modifier Letters  0x02D9"); +    writer.write("\n"); +    writer.write("\u02D9"); +    writer.write("\n"); +    writer.write("Spacing Modifier Letters  0x02DB"); +    writer.write("\n"); +    writer.write("\u02DB"); +    writer.write("\n"); +    writer.write("Spacing Modifier Letters  0x02DD"); +    writer.write("\n"); +    writer.write("\u02DD"); +    writer.write("\n"); +    writer.write("General Punctuation   0x2015"); +    writer.write("\n"); +    writer.write("\u2015"); +    writer.write("\n"); +    writer.write("Currency Symbols 0x20AC"); +    writer.write("\n"); +    writer.write("\u20AC"); +    writer.flush(); +   +    return bos.toByteArray(); +     +  } +   +  public void testTextValidation(String encoding) throws ValidationException, UnsupportedEncodingException { +     +    Validator validator = ValidatorFactory.newValidator("text/plain"); +     +    assertNotNull(validator); + +    InputStream is = new ByteArrayInputStream(generateText(encoding)); +     +    assertNotNull(is); +     +    validator.validate(is, encoding); +     +  } + +  @Test +  public void testUTF8() throws ValidationException, UnsupportedEncodingException { +    testTextValidation("UTF-8"); +  } + +  @Test +  public void testISO8859_1() throws ValidationException, UnsupportedEncodingException { +    testTextValidation("ISO-8859-1"); +  } + +  @Test +  public void testISO8859_2() throws ValidationException, UnsupportedEncodingException { +    testTextValidation("ISO-8859-2"); +  } + +  @Test +  public void testISO8859_3() throws ValidationException, UnsupportedEncodingException { +    testTextValidation("ISO-8859-3"); +  } + +  @Test +  public void testISO8859_9() throws ValidationException, UnsupportedEncodingException { +    testTextValidation("ISO-8859-9"); +  } + +  @Ignore +  @Test +  public void testISO8859_10() throws ValidationException, UnsupportedEncodingException { +    testTextValidation("ISO-8859-10"); +  } + +  @Test +  public void testISO8859_15() throws ValidationException, UnsupportedEncodingException { +    testTextValidation("ISO-8859-15"); +  } +   +} | 
