rewrite garbage char to scala, improve form validation

pull/9644/head
Thibault Duplessis 2021-08-23 23:18:45 +02:00
parent 3e46807eea
commit eee632ee48
3 changed files with 22 additions and 24 deletions

View File

@ -66,17 +66,20 @@ object Form {
def trim(m: Mapping[String]) = m.transform[String](_.trim, identity)
// trims and removes garbage chars before validation
val cleanTextFormatter: Formatter[String] = new Formatter[String] {
private val cleanTextFormatter: Formatter[String] = new Formatter[String] {
def bind(key: String, data: Map[String, String]) =
data
.get(key)
.map(_.trim)
.map(StringUtils.removeGarbageChars)
.map(String.normalize)
.toRight(Seq(FormError(key, "error.required", Nil)))
def unbind(key: String, value: String) = Map(key -> StringUtils.removeGarbageChars(value.trim))
def unbind(key: String, value: String) = Map(key -> String.normalize(value.trim))
}
val cleanText: Mapping[String] = of(cleanTextFormatter)
val cleanText: Mapping[String] = of(cleanTextFormatter).verifying(
"The text contains invalid chars",
s => !String.hasGarbageChars(s)
)
def cleanText(minLength: Int = 0, maxLength: Int = Int.MaxValue): Mapping[String] =
(minLength, maxLength) match {
case (min, Int.MaxValue) => cleanText.verifying(Constraints.minLength(min))

View File

@ -24,6 +24,21 @@ object String {
def urlencode(str: String): String = java.net.URLEncoder.encode(str, "US-ASCII")
def hasGarbageChars(str: String) = str.chars().anyMatch(isGarbageChar)
def isGarbageChar(c: Int) =
// invisible chars https://www.compart.com/en/unicode/block/U+2000
(c >= '\u2000' && c <= '\u200F') ||
// weird stuff https://www.compart.com/en/unicode/block/U+2000
(c >= '\u2028' && c <= '\u202F') ||
// bunch of probably useless blocks https://www.compart.com/en/unicode/block/U+2100
(c >= '\u2100' && c <= '\u2C5F') ||
// decorative chars
(c == '\ua9c1' || c == '\ua9c2')
// convert weird chars into letters when possible
def normalize(str: String): String = Normalizer.normalize(str, Normalizer.Form.NFKC)
def decodeUriPath(input: String): Option[String] = {
try {
play.utils.UriEncoding.decodePath(input, "UTF-8").some

View File

@ -78,26 +78,6 @@ public class StringUtils {
sb.append(sArr, start, end - start);
}
public static String removeGarbageChars(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFKC);
final char[] sArr = s.toCharArray();
final int size = sArr.length;
final StringBuilder sb = new StringBuilder(size);
for (int i = 0; i < size; i++) {
final char c = sArr[i];
// invisible chars https://www.compart.com/en/unicode/block/U+2000
if (c >= '\u2000' && c <= '\u200F') continue;
// weird stuff https://www.compart.com/en/unicode/block/U+2000
if (c >= '\u2028' && c <= '\u202F') continue;
// bunch of probably useless blocks https://www.compart.com/en/unicode/block/U+2100
if (c >= '\u2100' && c <= '\u2C5F') continue;
// decorative chars ꧁ ꧂
if (c == '\ua9c1' || c == '\ua9c2') continue;
sb.append(c);
}
return sb.toString();
}
/**
* https://github.com/tdebatty/java-string-similarity/blob/master/src/main/java/info/debatty/java/stringsimilarity/Levenshtein.java
*