summaryrefslogtreecommitdiff
path: root/BKUViewer
diff options
context:
space:
mode:
Diffstat (limited to 'BKUViewer')
-rw-r--r--BKUViewer/pom.xml15
-rw-r--r--BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java38
-rw-r--r--BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java121
3 files changed, 109 insertions, 65 deletions
diff --git a/BKUViewer/pom.xml b/BKUViewer/pom.xml
index 24605880..579d12f0 100644
--- a/BKUViewer/pom.xml
+++ b/BKUViewer/pom.xml
@@ -39,13 +39,10 @@
</exclusion>
</exclusions>
</dependency>
+ <dependency>
+ <groupId>at.gv.egiz</groupId>
+ <artifactId>BKUFonts</artifactId>
+ <version>1.0</version>
+ </dependency>
</dependencies>
- <!--build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId> <configuration>
- <skip>true</skip> </configuration> </plugin>
- </plugins>
- </build-->
-</project> \ No newline at end of file
+</project>
diff --git a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java
index 485aa727..dee8ff2e 100644
--- a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java
+++ b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java
@@ -16,20 +16,21 @@
*/
package at.gv.egiz.bku.text;
+import at.gv.egiz.bku.gui.viewer.FontProviderException;
+import at.gv.egiz.bku.viewer.ResourceFontLoader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.UnsupportedCharsetException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import at.gv.egiz.bku.viewer.ValidationException;
import at.gv.egiz.bku.viewer.Validator;
+import java.awt.Font;
public class TextValidator implements Validator {
@@ -37,7 +38,13 @@ public class TextValidator implements Validator {
* Logging facility.
*/
protected static Log log = LogFactory.getLog(TextValidator.class);
-
+
+ protected Font viewerFont;
+
+ public TextValidator() throws FontProviderException {
+ viewerFont = new ResourceFontLoader().getFont();
+ }
+
private void invalid(char c) throws ValidationException {
log.info("Invalid character (0x" + Integer.toHexString(c) + ") found.");
// TODO: localize
@@ -68,30 +75,7 @@ public class TextValidator implements Validator {
cb.flip();
for (int i = 0; i < l; i++) {
c = cb.get();
- if (c < '\u0020') {
- // C0 Controls and Basic Latin (0x000C-0x000D)
- if (c > '\r') invalid(c); if (c >= '\u000C') continue;
- // C0 Controls and Basic Latin (0x0009-0x000A)
- if (c > '\n') invalid(c); if (c >= '\t') continue;
- invalid(c);
- } else {
- // C0 Controls and Basic Latin (0x0020-0x007E)
- if (c <= '\u007E') continue;
- // C1 Controls and Latin-1 Supplement (0x00A1-0x00FF)
- if (c < '\u00A1') invalid(c); if (c <= '\u00FF') continue;
- // Latin Extended-A (0x0100-0x017F)
- if (c < '\u0100') invalid(c); if (c <= '\u017F') continue;
- // EURO Sign
- if (c == '\u20AC') continue;
- // Spacing Modifier Letters
- if (c == '\u02C7') continue;
- if (c == '\u02D8') continue;
- if (c == '\u02D9') continue;
- if (c == '\u02DB') continue;
- if (c == '\u02DD') continue;
- if (c == '\u2015') continue;
- invalid(c);
- }
+ if (!viewerFont.canDisplay(c)) invalid(c);
}
}
cb.clear();
diff --git a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java
index 7137911d..8db459e7 100644
--- a/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java
+++ b/BKUViewer/src/test/java/at/gv/egiz/bku/text/TestTextValidator.java
@@ -1,19 +1,19 @@
/*
-* Copyright 2008 Federal Chancellery Austria and
-* Graz University of Technology
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Copyright 2008 Federal Chancellery Austria and
+ * Graz University of Technology
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package at.gv.egiz.bku.text;
import static org.junit.Assert.*;
@@ -35,7 +35,7 @@ import at.gv.egiz.bku.viewer.ValidatorFactory;
public class TestTextValidator {
public static byte[] generateText(String encoding) throws UnsupportedEncodingException {
-
+
ByteArrayOutputStream bos = new ByteArrayOutputStream();
PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, encoding));
@@ -45,11 +45,15 @@ public class TestTextValidator {
writer.write(c);
}
writer.write("\n");
- writer.write("C0 Controls and Basic Latin 0x000C-0x000D");
+ // errata: don't include FORM FEED (0x000C)
+ writer.write("C0 Controls and Basic Latin 0x000D");
writer.write("\n");
- for (char c = '\f'; c <= '\r'; c++) {
- writer.write(c);
- }
+
+// for (char c = '\f'; c <= '\r'; c++) {
+// writer.write(c);
+// }
+
+ writer.write("\r");
writer.write("\n");
writer.write("C0 Controls and Basic Latin 0x0020-0x007E");
writer.write("\n");
@@ -97,23 +101,23 @@ public class TestTextValidator {
writer.write("\n");
writer.write("\u20AC");
writer.flush();
-
+
return bos.toByteArray();
-
+
}
-
+
public void testTextValidation(String encoding) throws ValidationException, UnsupportedEncodingException {
-
+
Validator validator = ValidatorFactory.newValidator("text/plain");
-
+
assertNotNull(validator);
InputStream is = new ByteArrayInputStream(generateText(encoding));
-
+
assertNotNull(is);
-
+
validator.validate(is, encoding);
-
+
}
@Test
@@ -151,5 +155,64 @@ public class TestTextValidator {
public void testISO8859_15() throws ValidationException, UnsupportedEncodingException {
testTextValidation("ISO-8859-15");
}
-
+
+ @Test
+ public void testPerformance() throws UnsupportedEncodingException, ValidationException {
+ Validator validator = ValidatorFactory.newValidator("text/plain");
+
+ assertNotNull(validator);
+
+ //!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϒϓϔϕϖϗϘϙϚϛϜϝϞϟϠϡЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџҐґҒғҔҕҖҗҘҙҚқҰұҲҳӀӁӂӃӄӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹ
+
+ StringBuilder data = new StringBuilder();
+ //LATIN
+ for (int i = 0x0021; i <= 0x007e; i++) {
+ data.append((char) i);
+ }
+ //LATIN supplement
+ for (int i = 0x00A1; i <= 0x00FF; i++) {
+ data.append((char) i);
+ }
+ //GREEK
+ for (int i = 0x038e; i <= 0x03a1; i++) {
+ data.append((char) i);
+ }
+ for (int i = 0x03a3; i <= 0x03ce; i++) {
+ data.append((char) i);
+ }
+ for (int i = 0x03d0; i <= 0x03e1; i++) {
+ data.append((char) i);
+ }
+ //CYRILLIC
+ for (int i = 0x0400; i <= 0x045f; i++) {
+ data.append((char) i);
+ }
+ for (int i = 0x0490; i <= 0x049b; i++) {
+ data.append((char) i);
+ }
+ for (int i = 0x04b0; i <= 0x04b3; i++) {
+ data.append((char) i);
+ }
+ for (int i = 0x04c0; i <= 0x04c4; i++) {
+ data.append((char) i);
+ }
+ for (int i = 0x04d0; i <= 0x04f9; i++) {
+ data.append((char) i);
+ }
+
+ StringBuilder aLotOfData = new StringBuilder();
+ for (int i = 0; i < 1000; i++) {
+ aLotOfData.append('\n');
+ aLotOfData.append(data);
+ }
+ String aLotOfText = aLotOfData.toString();
+ System.out.println("validating " + aLotOfText.length() + " weird characters: " + aLotOfText);
+
+ InputStream is = new ByteArrayInputStream(aLotOfText.getBytes("UTF-8"));
+
+ assertNotNull(is);
+
+ validator.validate(is, "UTF-8");
+
+ }
}