rewrite garbage char to scala, improve form validation
parent
3e46807eea
commit
eee632ee48
|
@ -66,17 +66,20 @@ object Form {
|
|||
def trim(m: Mapping[String]) = m.transform[String](_.trim, identity)
|
||||
|
||||
// trims and removes garbage chars before validation
|
||||
val cleanTextFormatter: Formatter[String] = new Formatter[String] {
|
||||
private val cleanTextFormatter: Formatter[String] = new Formatter[String] {
|
||||
def bind(key: String, data: Map[String, String]) =
|
||||
data
|
||||
.get(key)
|
||||
.map(_.trim)
|
||||
.map(StringUtils.removeGarbageChars)
|
||||
.map(String.normalize)
|
||||
.toRight(Seq(FormError(key, "error.required", Nil)))
|
||||
def unbind(key: String, value: String) = Map(key -> StringUtils.removeGarbageChars(value.trim))
|
||||
def unbind(key: String, value: String) = Map(key -> String.normalize(value.trim))
|
||||
}
|
||||
|
||||
val cleanText: Mapping[String] = of(cleanTextFormatter)
|
||||
val cleanText: Mapping[String] = of(cleanTextFormatter).verifying(
|
||||
"The text contains invalid chars",
|
||||
s => !String.hasGarbageChars(s)
|
||||
)
|
||||
def cleanText(minLength: Int = 0, maxLength: Int = Int.MaxValue): Mapping[String] =
|
||||
(minLength, maxLength) match {
|
||||
case (min, Int.MaxValue) => cleanText.verifying(Constraints.minLength(min))
|
||||
|
|
|
@ -24,6 +24,21 @@ object String {
|
|||
|
||||
def urlencode(str: String): String = java.net.URLEncoder.encode(str, "US-ASCII")
|
||||
|
||||
def hasGarbageChars(str: String) = str.chars().anyMatch(isGarbageChar)
|
||||
|
||||
def isGarbageChar(c: Int) =
|
||||
// invisible chars https://www.compart.com/en/unicode/block/U+2000
|
||||
(c >= '\u2000' && c <= '\u200F') ||
|
||||
// weird stuff https://www.compart.com/en/unicode/block/U+2000
|
||||
(c >= '\u2028' && c <= '\u202F') ||
|
||||
// bunch of probably useless blocks https://www.compart.com/en/unicode/block/U+2100
|
||||
(c >= '\u2100' && c <= '\u2C5F') ||
|
||||
// decorative chars ꧁ ꧂
|
||||
(c == '\ua9c1' || c == '\ua9c2')
|
||||
|
||||
// convert weird chars into letters when possible
|
||||
def normalize(str: String): String = Normalizer.normalize(str, Normalizer.Form.NFKC)
|
||||
|
||||
def decodeUriPath(input: String): Option[String] = {
|
||||
try {
|
||||
play.utils.UriEncoding.decodePath(input, "UTF-8").some
|
||||
|
|
|
@ -78,26 +78,6 @@ public class StringUtils {
|
|||
sb.append(sArr, start, end - start);
|
||||
}
|
||||
|
||||
public static String removeGarbageChars(String s) {
|
||||
s = Normalizer.normalize(s, Normalizer.Form.NFKC);
|
||||
final char[] sArr = s.toCharArray();
|
||||
final int size = sArr.length;
|
||||
final StringBuilder sb = new StringBuilder(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
final char c = sArr[i];
|
||||
// invisible chars https://www.compart.com/en/unicode/block/U+2000
|
||||
if (c >= '\u2000' && c <= '\u200F') continue;
|
||||
// weird stuff https://www.compart.com/en/unicode/block/U+2000
|
||||
if (c >= '\u2028' && c <= '\u202F') continue;
|
||||
// bunch of probably useless blocks https://www.compart.com/en/unicode/block/U+2100
|
||||
if (c >= '\u2100' && c <= '\u2C5F') continue;
|
||||
// decorative chars ꧁ ꧂
|
||||
if (c == '\ua9c1' || c == '\ua9c2') continue;
|
||||
sb.append(c);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* https://github.com/tdebatty/java-string-similarity/blob/master/src/main/java/info/debatty/java/stringsimilarity/Levenshtein.java
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue