diff options
Diffstat (limited to 'BKUViewer/src/main/java/at/gv')
| -rw-r--r-- | BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java | 77 | 
1 files changed, 76 insertions, 1 deletions
| diff --git a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java index 5108140d..485aa727 100644 --- a/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java +++ b/BKUViewer/src/main/java/at/gv/egiz/bku/text/TextValidator.java @@ -16,17 +16,92 @@  */  package at.gv.egiz.bku.text; +import java.io.IOException;  import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory;  import at.gv.egiz.bku.viewer.ValidationException;  import at.gv.egiz.bku.viewer.Validator;  public class TextValidator implements Validator { +  /** +   * Logging facility. +   */ +  protected static Log log = LogFactory.getLog(TextValidator.class); +   +  private void invalid(char c) throws ValidationException { +    log.info("Invalid character (0x" + Integer.toHexString(c) + ") found."); +    // TODO: localize +    throw new ValidationException(); +  } +      @Override    public void validate(InputStream is, String charset)        throws ValidationException { -    // TODO: implement character validation +     +    InputStreamReader reader; +    if (charset != null) { +      try { +        reader = new InputStreamReader(is, charset); +      } catch (UnsupportedEncodingException e) { +        log.info("Charset '" + charset + "' not supported.", e); +        // TODO: localize +        throw new ValidationException(e); +      } +    } else { +      reader = new InputStreamReader(is, Charset.forName("UTF-8")); +    } +     +    try { +      char c; +      CharBuffer cb = CharBuffer.allocate(256); +      for (int l; (l = reader.read(cb)) != -1;) { +        cb.flip(); +        for (int i = 0; i < l; i++) { +          c = cb.get(); +          if (c < '\u0020') { +            // C0 Controls and Basic Latin (0x000C-0x000D) +            if (c > '\r') invalid(c); if (c >= '\u000C') continue; +            // C0 Controls and Basic Latin (0x0009-0x000A) +            if (c > '\n') invalid(c); if (c >= '\t') continue; +            invalid(c); +          } else { +            // C0 Controls and Basic Latin (0x0020-0x007E) +            if (c <= '\u007E') continue; +            // C1 Controls and Latin-1 Supplement (0x00A1-0x00FF) +            if (c < '\u00A1') invalid(c); if (c <= '\u00FF') continue; +            // Latin Extended-A (0x0100-0x017F) +            if (c < '\u0100') invalid(c); if (c <= '\u017F') continue; +            // EURO Sign +            if (c == '\u20AC') continue; +            // Spacing Modifier Letters +            if (c == '\u02C7') continue; +            if (c == '\u02D8') continue; +            if (c == '\u02D9') continue; +            if (c == '\u02DB') continue; +            if (c == '\u02DD') continue; +            if (c == '\u2015') continue; +            invalid(c); +          } +        } +      } +      cb.clear(); +    } catch (IOException e) { +      // TODO: localize +      throw new ValidationException(e); +    } +     +     +        }  } | 
