burguillos.info/lib/BurguillosInfo/IndexUtils.pm

36 lines
669 B
Perl
Raw Normal View History

2023-09-04 17:48:50 +02:00
package BurguillosInfo::IndexUtils;
use v5.36.0;
use strict;
use warnings;
use utf8;
use feature 'signatures';
use Unicode::Normalize qw/NFKD/;
use Moo;
2023-09-05 19:29:30 +02:00
use Lingua::Stem::Snowball;
sub normalize($self, $text) {
2023-09-04 17:48:50 +02:00
return undef if !defined $text;
2023-09-05 17:54:23 +02:00
my $decomposed = NFKD($text);
2023-09-04 17:48:50 +02:00
$decomposed =~ s/\p{NonspacingMark}//g;
2023-09-05 19:29:30 +02:00
my @words;
while ($decomposed =~ /\b(\w+)\b/g) {
push @words, $1;
}
my $stemmer = Lingua::Stem::Snowball->new( lang => 'es' );
$stemmer->stem_in_place(\@words);
$decomposed = join " ", @words;
$decomposed =~ s/\bpizzeri\b/pizz/gi;
2023-09-04 17:48:50 +02:00
return $decomposed;
}
2023-09-05 17:54:23 +02:00
sub n (@args) {
2023-09-04 17:48:50 +02:00
normalize(@args);
}
1;