diff options
Diffstat (limited to 'BKUViewer/src')
| -rw-r--r-- | BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java | 38 | ||||
| -rw-r--r-- | BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java | 121 | 
2 files changed, 103 insertions, 56 deletions
| diff --git a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java index 485aa727..dee8ff2e 100644 --- a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java +++ b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java @@ -16,20 +16,21 @@  */  package at.gv.egiz.bku.text; +import at.gv.egiz.bku.gui.viewer.FontProviderException; +import at.gv.egiz.bku.viewer.ResourceFontLoader;  import java.io.IOException;  import java.io.InputStream;  import java.io.InputStreamReader;  import java.io.UnsupportedEncodingException;  import java.nio.CharBuffer;  import java.nio.charset.Charset; -import java.nio.charset.IllegalCharsetNameException; -import java.nio.charset.UnsupportedCharsetException;  import org.apache.commons.logging.Log;  import org.apache.commons.logging.LogFactory;  import at.gv.egiz.bku.viewer.ValidationException;  import at.gv.egiz.bku.viewer.Validator; +import java.awt.Font;  public class TextValidator implements Validator { @@ -37,7 +38,13 @@ public class TextValidator implements Validator {     * Logging facility.     */    protected static Log log = LogFactory.getLog(TextValidator.class); -   + +  protected Font viewerFont; + +  public TextValidator() throws FontProviderException { +    viewerFont = new ResourceFontLoader().getFont(); +  } +    private void invalid(char c) throws ValidationException {      log.info("Invalid character (0x" + Integer.toHexString(c) + ") found.");      // TODO: localize @@ -68,30 +75,7 @@ public class TextValidator implements Validator {          cb.flip();          for (int i = 0; i < l; i++) {            c = cb.get(); -          if (c < '\u0020') { -            // C0 Controls and Basic Latin (0x000C-0x000D) -            if (c > '\r') invalid(c); if (c >= '\u000C') continue; -            // C0 Controls and Basic Latin (0x0009-0x000A) -            if (c > '\n') invalid(c); if (c >= '\t') continue; -            invalid(c); -          } else { -            // C0 Controls and Basic Latin (0x0020-0x007E) -            if (c <= '\u007E') continue; -            // C1 Controls and Latin-1 Supplement (0x00A1-0x00FF) -            if (c < '\u00A1') invalid(c); if (c <= '\u00FF') continue; -            // Latin Extended-A (0x0100-0x017F) -            if (c < '\u0100') invalid(c); if (c <= '\u017F') continue; -            // EURO Sign -            if (c == '\u20AC') continue; -            // Spacing Modifier Letters -            if (c == '\u02C7') continue; -            if (c == '\u02D8') continue; -            if (c == '\u02D9') continue; -            if (c == '\u02DB') continue; -            if (c == '\u02DD') continue; -            if (c == '\u2015') continue; -            invalid(c); -          } +          if (!viewerFont.canDisplay(c)) invalid(c);          }        }        cb.clear(); diff --git a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java index 7137911d..8db459e7 100644 --- a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java +++ b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java @@ -1,19 +1,19 @@  /* -* Copyright 2008 Federal Chancellery Austria and -* Graz University of Technology -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -*     http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2008 Federal Chancellery Austria and + * Graz University of Technology + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *     http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */  package at.gv.egiz.bku.text;  import static org.junit.Assert.*; @@ -35,7 +35,7 @@ import at.gv.egiz.bku.viewer.ValidatorFactory;  public class TestTextValidator {    public static byte[] generateText(String encoding) throws UnsupportedEncodingException { -     +      ByteArrayOutputStream bos = new ByteArrayOutputStream();      PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, encoding)); @@ -45,11 +45,15 @@ public class TestTextValidator {        writer.write(c);      }      writer.write("\n"); -    writer.write("C0 Controls and Basic Latin  0x000C-0x000D"); +    // errata: don't include FORM FEED (0x000C) +    writer.write("C0 Controls and Basic Latin 0x000D");      writer.write("\n"); -    for (char c = '\f'; c <= '\r'; c++) { -      writer.write(c); -    } + +//    for (char c = '\f'; c <= '\r'; c++) { +//      writer.write(c); +//    } + +    writer.write("\r");      writer.write("\n");      writer.write("C0 Controls and Basic Latin  0x0020-0x007E");      writer.write("\n"); @@ -97,23 +101,23 @@ public class TestTextValidator {      writer.write("\n");      writer.write("\u20AC");      writer.flush(); -   +      return bos.toByteArray(); -     +    } -   +    public void testTextValidation(String encoding) throws ValidationException, UnsupportedEncodingException { -     +      Validator validator = ValidatorFactory.newValidator("text/plain"); -     +      assertNotNull(validator);      InputStream is = new ByteArrayInputStream(generateText(encoding)); -     +      assertNotNull(is); -     +      validator.validate(is, encoding); -     +    }    @Test @@ -151,5 +155,64 @@ public class TestTextValidator {    public void testISO8859_15() throws ValidationException, UnsupportedEncodingException {      testTextValidation("ISO-8859-15");    } -   + +  @Test +  public void testPerformance() throws UnsupportedEncodingException, ValidationException { +    Validator validator = ValidatorFactory.newValidator("text/plain"); + +    assertNotNull(validator); + +    //!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϒϓϔϕϖϗϘϙϚϛϜϝϞϟϠϡЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџҐґҒғҔҕҖҗҘҙҚқҰұҲҳӀӁӂӃӄӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹ + +    StringBuilder data = new StringBuilder(); +    //LATIN +    for (int i = 0x0021; i <= 0x007e; i++) { +      data.append((char) i); +    } +    //LATIN supplement +    for (int i = 0x00A1; i <= 0x00FF; i++) { +      data.append((char) i); +    } +    //GREEK +    for (int i = 0x038e; i <= 0x03a1; i++) { +      data.append((char) i); +    } +    for (int i = 0x03a3; i <= 0x03ce; i++) { +      data.append((char) i); +    } +    for (int i = 0x03d0; i <= 0x03e1; i++) { +      data.append((char) i); +    } +    //CYRILLIC +    for (int i = 0x0400; i <= 0x045f; i++) { +      data.append((char) i); +    } +    for (int i = 0x0490; i <= 0x049b; i++) { +      data.append((char) i); +    } +    for (int i = 0x04b0; i <= 0x04b3; i++) { +      data.append((char) i); +    } +    for (int i = 0x04c0; i <= 0x04c4; i++) { +      data.append((char) i); +    } +    for (int i = 0x04d0; i <= 0x04f9; i++) { +      data.append((char) i); +    } + +    StringBuilder aLotOfData = new StringBuilder(); +    for (int i = 0; i < 1000; i++) { +      aLotOfData.append('\n'); +      aLotOfData.append(data); +    } +    String aLotOfText = aLotOfData.toString(); +    System.out.println("validating " + aLotOfText.length() + " weird characters: " + aLotOfText); + +    InputStream is = new ByteArrayInputStream(aLotOfText.getBytes("UTF-8")); + +    assertNotNull(is); + +    validator.validate(is, "UTF-8"); + +  }  } | 
