From 83e8c95ea7d257166d350a59bfd81e9833ec14fd Mon Sep 17 00:00:00 2001 From: clemenso Date: Thu, 5 Nov 2009 19:05:14 +0000 Subject: [#484] European Language support git-svn-id: https://joinup.ec.europa.eu/svn/mocca/trunk@535 8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4 --- BKUViewer/pom.xml | 15 +-- .../java/at/gv/egiz/bku/text/TextValidator.java | 38 ++----- .../at/gv/egiz/bku/text/TestTextValidator.java | 121 ++++++++++++++++----- 3 files changed, 109 insertions(+), 65 deletions(-) (limited to 'BKUViewer') diff --git a/BKUViewer/pom.xml b/BKUViewer/pom.xml index 24605880..579d12f0 100644 --- a/BKUViewer/pom.xml +++ b/BKUViewer/pom.xml @@ -39,13 +39,10 @@ + + at.gv.egiz + BKUFonts + 1.0 + - - \ No newline at end of file + diff --git a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java index 485aa727..dee8ff2e 100644 --- a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java +++ b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java @@ -16,20 +16,21 @@ */ package at.gv.egiz.bku.text; +import at.gv.egiz.bku.gui.viewer.FontProviderException; +import at.gv.egiz.bku.viewer.ResourceFontLoader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.nio.CharBuffer; import java.nio.charset.Charset; -import java.nio.charset.IllegalCharsetNameException; -import java.nio.charset.UnsupportedCharsetException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import at.gv.egiz.bku.viewer.ValidationException; import at.gv.egiz.bku.viewer.Validator; +import java.awt.Font; public class TextValidator implements Validator { @@ -37,7 +38,13 @@ public class TextValidator implements Validator { * Logging facility. */ protected static Log log = LogFactory.getLog(TextValidator.class); - + + protected Font viewerFont; + + public TextValidator() throws FontProviderException { + viewerFont = new ResourceFontLoader().getFont(); + } + private void invalid(char c) throws ValidationException { log.info("Invalid character (0x" + Integer.toHexString(c) + ") found."); // TODO: localize @@ -68,30 +75,7 @@ public class TextValidator implements Validator { cb.flip(); for (int i = 0; i < l; i++) { c = cb.get(); - if (c < '\u0020') { - // C0 Controls and Basic Latin (0x000C-0x000D) - if (c > '\r') invalid(c); if (c >= '\u000C') continue; - // C0 Controls and Basic Latin (0x0009-0x000A) - if (c > '\n') invalid(c); if (c >= '\t') continue; - invalid(c); - } else { - // C0 Controls and Basic Latin (0x0020-0x007E) - if (c <= '\u007E') continue; - // C1 Controls and Latin-1 Supplement (0x00A1-0x00FF) - if (c < '\u00A1') invalid(c); if (c <= '\u00FF') continue; - // Latin Extended-A (0x0100-0x017F) - if (c < '\u0100') invalid(c); if (c <= '\u017F') continue; - // EURO Sign - if (c == '\u20AC') continue; - // Spacing Modifier Letters - if (c == '\u02C7') continue; - if (c == '\u02D8') continue; - if (c == '\u02D9') continue; - if (c == '\u02DB') continue; - if (c == '\u02DD') continue; - if (c == '\u2015') continue; - invalid(c); - } + if (!viewerFont.canDisplay(c)) invalid(c); } } cb.clear(); diff --git a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java index 7137911d..8db459e7 100644 --- a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java +++ b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java @@ -1,19 +1,19 @@ /* -* Copyright 2008 Federal Chancellery Austria and -* Graz University of Technology -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2008 Federal Chancellery Austria and + * Graz University of Technology + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package at.gv.egiz.bku.text; import static org.junit.Assert.*; @@ -35,7 +35,7 @@ import at.gv.egiz.bku.viewer.ValidatorFactory; public class TestTextValidator { public static byte[] generateText(String encoding) throws UnsupportedEncodingException { - + ByteArrayOutputStream bos = new ByteArrayOutputStream(); PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, encoding)); @@ -45,11 +45,15 @@ public class TestTextValidator { writer.write(c); } writer.write("\n"); - writer.write("C0 Controls and Basic Latin 0x000C-0x000D"); + // errata: don't include FORM FEED (0x000C) + writer.write("C0 Controls and Basic Latin 0x000D"); writer.write("\n"); - for (char c = '\f'; c <= '\r'; c++) { - writer.write(c); - } + +// for (char c = '\f'; c <= '\r'; c++) { +// writer.write(c); +// } + + writer.write("\r"); writer.write("\n"); writer.write("C0 Controls and Basic Latin 0x0020-0x007E"); writer.write("\n"); @@ -97,23 +101,23 @@ public class TestTextValidator { writer.write("\n"); writer.write("\u20AC"); writer.flush(); - + return bos.toByteArray(); - + } - + public void testTextValidation(String encoding) throws ValidationException, UnsupportedEncodingException { - + Validator validator = ValidatorFactory.newValidator("text/plain"); - + assertNotNull(validator); InputStream is = new ByteArrayInputStream(generateText(encoding)); - + assertNotNull(is); - + validator.validate(is, encoding); - + } @Test @@ -151,5 +155,64 @@ public class TestTextValidator { public void testISO8859_15() throws ValidationException, UnsupportedEncodingException { testTextValidation("ISO-8859-15"); } - + + @Test + public void testPerformance() throws UnsupportedEncodingException, ValidationException { + Validator validator = ValidatorFactory.newValidator("text/plain"); + + assertNotNull(validator); + + //!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϒϓϔϕϖϗϘϙϚϛϜϝϞϟϠϡЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџҐґҒғҔҕҖҗҘҙҚқҰұҲҳӀӁӂӃӄӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹ + + StringBuilder data = new StringBuilder(); + //LATIN + for (int i = 0x0021; i <= 0x007e; i++) { + data.append((char) i); + } + //LATIN supplement + for (int i = 0x00A1; i <= 0x00FF; i++) { + data.append((char) i); + } + //GREEK + for (int i = 0x038e; i <= 0x03a1; i++) { + data.append((char) i); + } + for (int i = 0x03a3; i <= 0x03ce; i++) { + data.append((char) i); + } + for (int i = 0x03d0; i <= 0x03e1; i++) { + data.append((char) i); + } + //CYRILLIC + for (int i = 0x0400; i <= 0x045f; i++) { + data.append((char) i); + } + for (int i = 0x0490; i <= 0x049b; i++) { + data.append((char) i); + } + for (int i = 0x04b0; i <= 0x04b3; i++) { + data.append((char) i); + } + for (int i = 0x04c0; i <= 0x04c4; i++) { + data.append((char) i); + } + for (int i = 0x04d0; i <= 0x04f9; i++) { + data.append((char) i); + } + + StringBuilder aLotOfData = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + aLotOfData.append('\n'); + aLotOfData.append(data); + } + String aLotOfText = aLotOfData.toString(); + System.out.println("validating " + aLotOfText.length() + " weird characters: " + aLotOfText); + + InputStream is = new ByteArrayInputStream(aLotOfText.getBytes("UTF-8")); + + assertNotNull(is); + + validator.validate(is, "UTF-8"); + + } } -- cgit v1.2.3