From 9f441a0aaf0e55b50014e814410c61117f7330c4 Mon Sep 17 00:00:00 2001 From: mcentner Date: Fri, 12 Sep 2008 13:50:31 +0000 Subject: Add text validation. git-svn-id: https://joinup.ec.europa.eu/svn/mocca/trunk@35 8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4 --- .../at/gv/egiz/bku/text/TestTextValidator.java | 155 +++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java (limited to 'BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java') diff --git a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java new file mode 100644 index 00000000..7137911d --- /dev/null +++ b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java @@ -0,0 +1,155 @@ +/* +* Copyright 2008 Federal Chancellery Austria and +* Graz University of Technology +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package at.gv.egiz.bku.text; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; + +import org.junit.Ignore; +import org.junit.Test; + +import at.gv.egiz.bku.viewer.ValidationException; +import at.gv.egiz.bku.viewer.Validator; +import at.gv.egiz.bku.viewer.ValidatorFactory; + +public class TestTextValidator { + + public static byte[] generateText(String encoding) throws UnsupportedEncodingException { + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, encoding)); + + writer.write("C0 Controls and Basic Latin 0x0009-0x000A"); + writer.write("\n"); + for (char c = '\t'; c <= '\n'; c++) { + writer.write(c); + } + writer.write("\n"); + writer.write("C0 Controls and Basic Latin 0x000C-0x000D"); + writer.write("\n"); + for (char c = '\f'; c <= '\r'; c++) { + writer.write(c); + } + writer.write("\n"); + writer.write("C0 Controls and Basic Latin 0x0020-0x007E"); + writer.write("\n"); + for (char c = '\u0020'; c <= '\u007E'; c++) { + writer.write(c); + } + writer.write("\n"); + writer.write("C1 Controls and Latin-1 Supplement 0x00A1-0x00FF"); + writer.write("\n"); + for (char c = '\u00A1'; c <= '\u00FF'; c++) { + writer.write(c); + } + writer.write("\n"); + writer.write("Latin Extended-A 0x0100-0x017F"); + writer.write("\n"); + for (char c = '\u0100'; c <= '\u017F'; c++) { + writer.write(c); + } + writer.write("\n"); + writer.write("Spacing Modifier Letters 0x02C7"); + writer.write("\n"); + writer.write("\u02C7"); + writer.write("\n"); + writer.write("Spacing Modifier Letters 0x02D8"); + writer.write("\n"); + writer.write("\u02D8"); + writer.write("\n"); + writer.write("Spacing Modifier Letters 0x02D9"); + writer.write("\n"); + writer.write("\u02D9"); + writer.write("\n"); + writer.write("Spacing Modifier Letters 0x02DB"); + writer.write("\n"); + writer.write("\u02DB"); + writer.write("\n"); + writer.write("Spacing Modifier Letters 0x02DD"); + writer.write("\n"); + writer.write("\u02DD"); + writer.write("\n"); + writer.write("General Punctuation 0x2015"); + writer.write("\n"); + writer.write("\u2015"); + writer.write("\n"); + writer.write("Currency Symbols 0x20AC"); + writer.write("\n"); + writer.write("\u20AC"); + writer.flush(); + + return bos.toByteArray(); + + } + + public void testTextValidation(String encoding) throws ValidationException, UnsupportedEncodingException { + + Validator validator = ValidatorFactory.newValidator("text/plain"); + + assertNotNull(validator); + + InputStream is = new ByteArrayInputStream(generateText(encoding)); + + assertNotNull(is); + + validator.validate(is, encoding); + + } + + @Test + public void testUTF8() throws ValidationException, UnsupportedEncodingException { + testTextValidation("UTF-8"); + } + + @Test + public void testISO8859_1() throws ValidationException, UnsupportedEncodingException { + testTextValidation("ISO-8859-1"); + } + + @Test + public void testISO8859_2() throws ValidationException, UnsupportedEncodingException { + testTextValidation("ISO-8859-2"); + } + + @Test + public void testISO8859_3() throws ValidationException, UnsupportedEncodingException { + testTextValidation("ISO-8859-3"); + } + + @Test + public void testISO8859_9() throws ValidationException, UnsupportedEncodingException { + testTextValidation("ISO-8859-9"); + } + + @Ignore + @Test + public void testISO8859_10() throws ValidationException, UnsupportedEncodingException { + testTextValidation("ISO-8859-10"); + } + + @Test + public void testISO8859_15() throws ValidationException, UnsupportedEncodingException { + testTextValidation("ISO-8859-15"); + } + +} -- cgit v1.2.3 From 83e8c95ea7d257166d350a59bfd81e9833ec14fd Mon Sep 17 00:00:00 2001 From: clemenso Date: Thu, 5 Nov 2009 19:05:14 +0000 Subject: [#484] European Language support git-svn-id: https://joinup.ec.europa.eu/svn/mocca/trunk@535 8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4 --- .../at/gv/egiz/bku/text/TestTextValidator.java | 121 ++++++++++++++++----- 1 file changed, 92 insertions(+), 29 deletions(-) (limited to 'BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java') diff --git a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java index 7137911d..8db459e7 100644 --- a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java +++ b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java @@ -1,19 +1,19 @@ /* -* Copyright 2008 Federal Chancellery Austria and -* Graz University of Technology -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2008 Federal Chancellery Austria and + * Graz University of Technology + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package at.gv.egiz.bku.text; import static org.junit.Assert.*; @@ -35,7 +35,7 @@ import at.gv.egiz.bku.viewer.ValidatorFactory; public class TestTextValidator { public static byte[] generateText(String encoding) throws UnsupportedEncodingException { - + ByteArrayOutputStream bos = new ByteArrayOutputStream(); PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, encoding)); @@ -45,11 +45,15 @@ public class TestTextValidator { writer.write(c); } writer.write("\n"); - writer.write("C0 Controls and Basic Latin 0x000C-0x000D"); + // errata: don't include FORM FEED (0x000C) + writer.write("C0 Controls and Basic Latin 0x000D"); writer.write("\n"); - for (char c = '\f'; c <= '\r'; c++) { - writer.write(c); - } + +// for (char c = '\f'; c <= '\r'; c++) { +// writer.write(c); +// } + + writer.write("\r"); writer.write("\n"); writer.write("C0 Controls and Basic Latin 0x0020-0x007E"); writer.write("\n"); @@ -97,23 +101,23 @@ public class TestTextValidator { writer.write("\n"); writer.write("\u20AC"); writer.flush(); - + return bos.toByteArray(); - + } - + public void testTextValidation(String encoding) throws ValidationException, UnsupportedEncodingException { - + Validator validator = ValidatorFactory.newValidator("text/plain"); - + assertNotNull(validator); InputStream is = new ByteArrayInputStream(generateText(encoding)); - + assertNotNull(is); - + validator.validate(is, encoding); - + } @Test @@ -151,5 +155,64 @@ public class TestTextValidator { public void testISO8859_15() throws ValidationException, UnsupportedEncodingException { testTextValidation("ISO-8859-15"); } - + + @Test + public void testPerformance() throws UnsupportedEncodingException, ValidationException { + Validator validator = ValidatorFactory.newValidator("text/plain"); + + assertNotNull(validator); + + //!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϒϓϔϕϖϗϘϙϚϛϜϝϞϟϠϡЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџҐґҒғҔҕҖҗҘҙҚқҰұҲҳӀӁӂӃӄӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹ + + StringBuilder data = new StringBuilder(); + //LATIN + for (int i = 0x0021; i <= 0x007e; i++) { + data.append((char) i); + } + //LATIN supplement + for (int i = 0x00A1; i <= 0x00FF; i++) { + data.append((char) i); + } + //GREEK + for (int i = 0x038e; i <= 0x03a1; i++) { + data.append((char) i); + } + for (int i = 0x03a3; i <= 0x03ce; i++) { + data.append((char) i); + } + for (int i = 0x03d0; i <= 0x03e1; i++) { + data.append((char) i); + } + //CYRILLIC + for (int i = 0x0400; i <= 0x045f; i++) { + data.append((char) i); + } + for (int i = 0x0490; i <= 0x049b; i++) { + data.append((char) i); + } + for (int i = 0x04b0; i <= 0x04b3; i++) { + data.append((char) i); + } + for (int i = 0x04c0; i <= 0x04c4; i++) { + data.append((char) i); + } + for (int i = 0x04d0; i <= 0x04f9; i++) { + data.append((char) i); + } + + StringBuilder aLotOfData = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + aLotOfData.append('\n'); + aLotOfData.append(data); + } + String aLotOfText = aLotOfData.toString(); + System.out.println("validating " + aLotOfText.length() + " weird characters: " + aLotOfText); + + InputStream is = new ByteArrayInputStream(aLotOfText.getBytes("UTF-8")); + + assertNotNull(is); + + validator.validate(is, "UTF-8"); + + } } -- cgit v1.2.3