burguillos.info/lib/BurguillosInfo/IndexUtils.pm

35 lines
627 B
Perl

package BurguillosInfo::IndexUtils;
use v5.36.0;
use strict;
use warnings;
use utf8;
use feature 'signatures';
use Unicode::Normalize qw/NFKD/;
use Moo;
use Lingua::Stem::Snowball;
sub normalize($self, $text) {
return undef if !defined $text;
my $decomposed = NFKD($text);
$decomposed =~ s/\p{NonspacingMark}//g;
my @words;
while ($decomposed =~ /\b(\w+)\b/g) {
push @words, $1;
}
my $stemmer = Lingua::Stem::Snowball->new( lang => 'es' );
$stemmer->stem_in_place(\@words);
$decomposed = join " ", @words;
return $decomposed;
}
sub n (@args) {
normalize(@args);
}
1;