burguillos.info/lib/BurguillosInfo/IndexUtils.pm

33 lines
769 B
Perl
Raw Normal View History

2023-09-04 17:48:50 +02:00
package BurguillosInfo::IndexUtils;
use v5.36.0;
use strict;
use warnings;
use utf8;
use feature 'signatures';
use Unicode::Normalize qw/NFKD/;
use Moo;
sub normalize($self, $text) {
return undef if !defined $text;
my $decomposed = NFKD( $text );
$decomposed =~ s/\p{NonspacingMark}//g;
2023-09-05 17:28:57 +02:00
$decomposed =~ s/(?<=\w{4})(?:
ada|ado|aje|cion|diccion|duccion|dura|eccion|epcion|ido|miento|
ncia|scripcion|sicion|sion|dad|tad|bilidad|edad|era|eria|ez|eza|ia|idad|ismo|
2023-09-05 17:08:37 +02:00
ncia|ante|ente|ura|dor|dero|ero|ista|ado|ario|ia|ero|eria|able|aceo|aco|al|aneo|
2023-09-05 17:28:57 +02:00
ante|ario|ente|rgir|ento|errimo|ible|ico|ifico|il|ino|isimo|ivo|izo|oso|ecer|
2023-09-05 17:08:37 +02:00
ificar|izar|es|as|os|e|o|a
)\b//xg;
2023-09-04 17:48:50 +02:00
return $decomposed;
}
sub n(@args) {
normalize(@args);
}
1;