2023-09-04 17:48:50 +02:00
|
|
|
package BurguillosInfo::IndexUtils;
|
|
|
|
|
|
|
|
use v5.36.0;
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use utf8;
|
|
|
|
|
|
|
|
use feature 'signatures';
|
|
|
|
|
|
|
|
use Unicode::Normalize qw/NFKD/;
|
|
|
|
|
|
|
|
use Moo;
|
|
|
|
|
2023-09-05 19:29:30 +02:00
|
|
|
use Lingua::Stem::Snowball;
|
|
|
|
|
|
|
|
sub normalize($self, $text) {
|
2023-09-04 17:48:50 +02:00
|
|
|
return undef if !defined $text;
|
2023-09-05 17:54:23 +02:00
|
|
|
my $decomposed = NFKD($text);
|
2023-09-04 17:48:50 +02:00
|
|
|
$decomposed =~ s/\p{NonspacingMark}//g;
|
2023-09-05 19:29:30 +02:00
|
|
|
my @words;
|
|
|
|
while ($decomposed =~ /\b(\w+)\b/g) {
|
|
|
|
push @words, $1;
|
|
|
|
}
|
|
|
|
my $stemmer = Lingua::Stem::Snowball->new( lang => 'es' );
|
|
|
|
$stemmer->stem_in_place(\@words);
|
|
|
|
$decomposed = join " ", @words;
|
2023-09-07 20:12:51 +02:00
|
|
|
$decomposed =~ s/\bpizzeri\b/pizz/gi;
|
2023-09-04 17:48:50 +02:00
|
|
|
return $decomposed;
|
|
|
|
}
|
|
|
|
|
2023-09-05 17:54:23 +02:00
|
|
|
sub n (@args) {
|
2023-09-04 17:48:50 +02:00
|
|
|
normalize(@args);
|
|
|
|
}
|
|
|
|
1;
|