From 7c5c94b45aa276b92e26afb3a45dd1221d97359e Mon Sep 17 00:00:00 2001 From: Sergiotarxz Date: Tue, 5 Sep 2023 17:08:37 +0200 Subject: [PATCH] Better normalization hopefully. --- lib/BurguillosInfo/IndexUtils.pm | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/BurguillosInfo/IndexUtils.pm b/lib/BurguillosInfo/IndexUtils.pm index c6e7e9d..c955319 100644 --- a/lib/BurguillosInfo/IndexUtils.pm +++ b/lib/BurguillosInfo/IndexUtils.pm @@ -16,7 +16,13 @@ sub normalize($self, $text) { return undef if !defined $text; my $decomposed = NFKD( $text ); $decomposed =~ s/\p{NonspacingMark}//g; - $decomposed =~ s/s\b//g; + $decomposed =~ s/(?: + ada|ado|aje|cion|diccion|duccion|dura|ección|epcion|ido|ion|miento| + ncia|on|scripcion|sicion|sion|dad|tad|bilidad|edad|era|eria|ez|eza|ia|idad|ismo| + ncia|ante|ente|ura|dor|dero|ero|ista|ado|ario|ia|ero|eria|able|aceo|aco|al|aneo| + ante|ario|ente|rgir|ento|errimo|ible|ico|ífico|il|ino|ísimo|ivo|izo|oso|ear|ecer + ificar|izar|es|as|os|e|o|a + )\b//xg; $decomposed =~ s/a\b/o/g; return $decomposed; }