Better normalization hopefully.
This commit is contained in:
parent
96b403ff87
commit
7c5c94b45a
@ -16,7 +16,13 @@ sub normalize($self, $text) {
|
|||||||
return undef if !defined $text;
|
return undef if !defined $text;
|
||||||
my $decomposed = NFKD( $text );
|
my $decomposed = NFKD( $text );
|
||||||
$decomposed =~ s/\p{NonspacingMark}//g;
|
$decomposed =~ s/\p{NonspacingMark}//g;
|
||||||
$decomposed =~ s/s\b//g;
|
$decomposed =~ s/(?:
|
||||||
|
ada|ado|aje|cion|diccion|duccion|dura|ección|epcion|ido|ion|miento|
|
||||||
|
ncia|on|scripcion|sicion|sion|dad|tad|bilidad|edad|era|eria|ez|eza|ia|idad|ismo|
|
||||||
|
ncia|ante|ente|ura|dor|dero|ero|ista|ado|ario|ia|ero|eria|able|aceo|aco|al|aneo|
|
||||||
|
ante|ario|ente|rgir|ento|errimo|ible|ico|ífico|il|ino|ísimo|ivo|izo|oso|ear|ecer
|
||||||
|
ificar|izar|es|as|os|e|o|a
|
||||||
|
)\b//xg;
|
||||||
$decomposed =~ s/a\b/o/g;
|
$decomposed =~ s/a\b/o/g;
|
||||||
return $decomposed;
|
return $decomposed;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user