* fixes for false posatives and missed match

* prevented 'i am' from matching

* noob sometimes spelt with many o

* fix for 'shit' + turkish/latin 'i'

* changed tests so that 'shit' not used as insult

* removed 'shit' filtering - needs more testing + improved turk highlight + very harsh en insults

* update test due to changed regex

* forgot to readd something

* combined doublicated words

* small modification
pull/8673/head
DanWaLes 2021-04-14 19:02:35 +01:00 committed by GitHub
parent 7c91223395
commit 96e3930373
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 11 deletions

View File

@ -8,8 +8,7 @@ private object Dictionary {
def en = dict("""
(f+|ph)(u{1,}|a{1,}|e{1,})c?k(er|r|u|k|ed|d|t|ing?|ign|en|tard?|face|off?|)
(f|ph)ag
(f|ph)agg?ot
(f|ph)agg?(ot|)
cock(suc?ker|)
[ck]um(shot|)
[ck]unt(ing|)
@ -63,13 +62,14 @@ hitler+
homm?o(sexual|)
honkey
hooker
(ho?pe (yo)?u(r family)? die|dies? irl)
horny
humping
idiot
incest
jerk
jizz?(um|)
kill (yo)?urself
(kill|hang) (yo)?ur(self| family)
kys
labia
lamer?
@ -86,8 +86,7 @@ nigg?(er|a|ah)
nonce
noo+b
nutsac?k
pa?edo
pa?edo(f|ph)ile
pa?edo((f|ph)ile|)
paki
pathetic
pa?ederast
@ -120,7 +119,7 @@ semen
sex
shag
shemale
(((you'? ?((is|a?re) )?)shit)|(shit(?!\b)))(z|e|y|ty|bag|)
shit(z|e|y|ty|bag|)
sissy
slag
slave
@ -241,17 +240,17 @@ wichser
""")
def tr = dict("""
am[iı]na (koyay[iı]m|koydum)
am[iı]na (koyay[iı]m|koy?dum)
amc[iı]k
anan[iı]n am[iı]
ananizi s[ii̇]k[ii̇]y[ii̇]m
ann?an[iı](zi)? s[ii̇]k[eii̇]y[ii̇]m
aptal
beyinsiz
bok yedin
gerizekal[iı]
ibne
ka[]ar
orospu
orospu( ([çc]o[çc]u[ğg]?u|evlad[ıi]))?
piç(lik)?
pu[]t
salak

View File

@ -14,7 +14,7 @@ class AnalyserTest extends Specification {
find("well fuck me") must_== List("fuck")
}
"find many bad words" in {
find("fucked that shit") must_== List("fucked")
find("fucked that shit") must_== List("fucked", "shit")
find("Beat them cunting nigger faggots with a communist dick") must_==
List("cunting", "nigger", "faggots", "dick")
}
@ -52,7 +52,7 @@ class AnalyserTest extends Specification {
find("fuck fffuuk fektard feak fak phuk") must_== List("fuck", "fffuuk", "fektard", "fak", "phuk")
}
"compute ratio" in {
ratio("fuck that shit") must_== 1d / 3
ratio("fuck that shit") must_== 2d / 3
ratio("Beat them cunting nigger faggots with a communist dick") must_== 4d / 9
ratio("hello there") must_== 0d
ratio("") must_== 0d