burguillos.info/lib/BurguillosInfo/IndexUtils.pm

65 lines
1.6 KiB
Perl
Raw Normal View History

2023-09-04 17:48:50 +02:00
package BurguillosInfo::IndexUtils;
use v5.36.0;
use strict;
use warnings;
use utf8;
use feature 'signatures';
use Unicode::Normalize qw/NFKD/;
use Moo;
2023-09-05 17:54:23 +02:00
sub normalize ( $self, $text ) {
2023-09-04 17:48:50 +02:00
return undef if !defined $text;
2023-09-05 17:54:23 +02:00
my $decomposed = NFKD($text);
2023-09-04 17:48:50 +02:00
$decomposed =~ s/\p{NonspacingMark}//g;
2023-09-05 17:54:23 +02:00
$decomposed =~ s/es\b//g;
$decomposed =~ s/as\b//g;
$decomposed =~ s/os\b//g;
$decomposed =~ s/e\b//g;
$decomposed =~ s/o\b//g;
$decomposed =~ s/a\b//g;
2023-09-05 18:05:23 +02:00
$decomposed =~ s/i\b//g;
2023-09-05 17:54:23 +02:00
$decomposed =~ s/cion\b//g;
$decomposed =~ s/diccion\b//g;
$decomposed =~ s/duccion\b//g;
2023-09-05 18:05:23 +02:00
$decomposed =~ s/dur\b//g;
2023-09-05 17:54:23 +02:00
$decomposed =~ s/eccion\b//g;
$decomposed =~ s/epcion\b//g;
2023-09-05 18:05:23 +02:00
$decomposed =~ s/mient\b//g;
2023-09-05 17:54:23 +02:00
$decomposed =~ s/scripcion\b//g;
$decomposed =~ s/sicion\b//g;
$decomposed =~ s/sion\b//g;
$decomposed =~ s/dad\b//g;
$decomposed =~ s/tad\b//g;
$decomposed =~ s/bilidad\b//g;
$decomposed =~ s/edad\b//g;
$decomposed =~ s/idad\b//g;
2023-09-05 18:05:23 +02:00
$decomposed =~ s/ism\b//g;
$decomposed =~ s/ant\b//g;
$decomposed =~ s/ent\b//g;
2023-09-05 17:54:23 +02:00
$decomposed =~ s/dor\b//g;
2023-09-05 18:05:23 +02:00
$decomposed =~ s/der\b//g;
$decomposed =~ s/ist\b//g;
$decomposed =~ s/abl\b//g;
$decomposed =~ s/ant\b//g;
$decomposed =~ s/ent\b//g;
2023-09-05 17:54:23 +02:00
$decomposed =~ s/rgir\b//g;
2023-09-05 18:05:23 +02:00
$decomposed =~ s/ent\b//g;
$decomposed =~ s/errim\b//g;
$decomposed =~ s/ibl\b//g;
$decomposed =~ s/ific\b//g;
$decomposed =~ s/isim\b//g;
2023-09-05 17:54:23 +02:00
$decomposed =~ s/ecer\b//g;
2023-09-05 18:05:23 +02:00
$decomposed =~ s/ific\b//g;
2023-09-04 17:48:50 +02:00
return $decomposed;
}
2023-09-05 17:54:23 +02:00
sub n (@args) {
2023-09-04 17:48:50 +02:00
normalize(@args);
}
1;