From 9f441a0aaf0e55b50014e814410c61117f7330c4 Mon Sep 17 00:00:00 2001
From: mcentner <mcentner@8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4>
Date: Fri, 12 Sep 2008 13:50:31 +0000
Subject: Add text validation.

git-svn-id: https://joinup.ec.europa.eu/svn/mocca/trunk@35 8a26b1a7-26f0-462f-b9ef-d0e30c41f5a4
---
 .../java/at/gv/egiz/bku/text/TextValidator.java    | 77 +++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

(limited to 'BKUViewer/src/main')

diff --git a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java
index 5108140d..485aa727 100644
--- a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java
+++ b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java
@@ -16,17 +16,92 @@
 */
 package at.gv.egiz.bku.text;
 
+import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 
 import at.gv.egiz.bku.viewer.ValidationException;
 import at.gv.egiz.bku.viewer.Validator;
 
 public class TextValidator implements Validator {
 
+  /**
+   * Logging facility.
+   */
+  protected static Log log = LogFactory.getLog(TextValidator.class);
+  
+  private void invalid(char c) throws ValidationException {
+    log.info("Invalid character (0x" + Integer.toHexString(c) + ") found.");
+    // TODO: localize
+    throw new ValidationException();
+  }
+  
   @Override
   public void validate(InputStream is, String charset)
       throws ValidationException {
-    // TODO: implement character validation
+    
+    InputStreamReader reader;
+    if (charset != null) {
+      try {
+        reader = new InputStreamReader(is, charset);
+      } catch (UnsupportedEncodingException e) {
+        log.info("Charset '" + charset + "' not supported.", e);
+        // TODO: localize
+        throw new ValidationException(e);
+      }
+    } else {
+      reader = new InputStreamReader(is, Charset.forName("UTF-8"));
+    }
+    
+    try {
+      char c;
+      CharBuffer cb = CharBuffer.allocate(256);
+      for (int l; (l = reader.read(cb)) != -1;) {
+        cb.flip();
+        for (int i = 0; i < l; i++) {
+          c = cb.get();
+          if (c < '\u0020') {
+            // C0 Controls and Basic Latin (0x000C-0x000D)
+            if (c > '\r') invalid(c); if (c >= '\u000C') continue;
+            // C0 Controls and Basic Latin (0x0009-0x000A)
+            if (c > '\n') invalid(c); if (c >= '\t') continue;
+            invalid(c);
+          } else {
+            // C0 Controls and Basic Latin (0x0020-0x007E)
+            if (c <= '\u007E') continue;
+            // C1 Controls and Latin-1 Supplement (0x00A1-0x00FF)
+            if (c < '\u00A1') invalid(c); if (c <= '\u00FF') continue;
+            // Latin Extended-A (0x0100-0x017F)
+            if (c < '\u0100') invalid(c); if (c <= '\u017F') continue;
+            // EURO Sign
+            if (c == '\u20AC') continue;
+            // Spacing Modifier Letters
+            if (c == '\u02C7') continue;
+            if (c == '\u02D8') continue;
+            if (c == '\u02D9') continue;
+            if (c == '\u02DB') continue;
+            if (c == '\u02DD') continue;
+            if (c == '\u2015') continue;
+            invalid(c);
+          }
+        }
+      }
+      cb.clear();
+    } catch (IOException e) {
+      // TODO: localize
+      throw new ValidationException(e);
+    }
+    
+    
+    
   }
 
 }
-- 
cgit v1.2.3