Adding a Stemmer insteado of suffixes.
This commit is contained in:
parent
470f0facaa
commit
9f309be709
1
Build.PL
1
Build.PL
@ -25,6 +25,7 @@ my $build = Module::Build->new(
|
||||
'Moo::Role' => 0,
|
||||
'Module::Pluggable' => 0,
|
||||
'List::AllUtils' => 0,
|
||||
'Lingua::Stem::Snowball' => 0,
|
||||
},
|
||||
);
|
||||
$build->create_build_script;
|
||||
|
@ -12,49 +12,19 @@ use Unicode::Normalize qw/NFKD/;
|
||||
|
||||
use Moo;
|
||||
|
||||
sub normalize ( $self, $text ) {
|
||||
use Lingua::Stem::Snowball;
|
||||
|
||||
sub normalize($self, $text) {
|
||||
return undef if !defined $text;
|
||||
my $decomposed = NFKD($text);
|
||||
$decomposed =~ s/\p{NonspacingMark}//g;
|
||||
$decomposed =~ s/es\b//g;
|
||||
$decomposed =~ s/as\b//g;
|
||||
$decomposed =~ s/os\b//g;
|
||||
$decomposed =~ s/e\b//g;
|
||||
$decomposed =~ s/o\b//g;
|
||||
$decomposed =~ s/a\b//g;
|
||||
$decomposed =~ s/i\b//g;
|
||||
$decomposed =~ s/cion\b//g;
|
||||
$decomposed =~ s/diccion\b//g;
|
||||
$decomposed =~ s/duccion\b//g;
|
||||
$decomposed =~ s/dur\b//g;
|
||||
$decomposed =~ s/eccion\b//g;
|
||||
$decomposed =~ s/epcion\b//g;
|
||||
$decomposed =~ s/mient\b//g;
|
||||
$decomposed =~ s/scripcion\b//g;
|
||||
$decomposed =~ s/sicion\b//g;
|
||||
$decomposed =~ s/sion\b//g;
|
||||
$decomposed =~ s/dad\b//g;
|
||||
$decomposed =~ s/tad\b//g;
|
||||
$decomposed =~ s/bilidad\b//g;
|
||||
$decomposed =~ s/edad\b//g;
|
||||
$decomposed =~ s/idad\b//g;
|
||||
$decomposed =~ s/ism\b//g;
|
||||
$decomposed =~ s/ant\b//g;
|
||||
$decomposed =~ s/ent\b//g;
|
||||
$decomposed =~ s/dor\b//g;
|
||||
$decomposed =~ s/der\b//g;
|
||||
$decomposed =~ s/ist\b//g;
|
||||
$decomposed =~ s/abl\b//g;
|
||||
$decomposed =~ s/ant\b//g;
|
||||
$decomposed =~ s/ent\b//g;
|
||||
$decomposed =~ s/rgir\b//g;
|
||||
$decomposed =~ s/ent\b//g;
|
||||
$decomposed =~ s/errim\b//g;
|
||||
$decomposed =~ s/ibl\b//g;
|
||||
$decomposed =~ s/ific\b//g;
|
||||
$decomposed =~ s/isim\b//g;
|
||||
$decomposed =~ s/ecer\b//g;
|
||||
$decomposed =~ s/ific\b//g;
|
||||
my @words;
|
||||
while ($decomposed =~ /\b(\w+)\b/g) {
|
||||
push @words, $1;
|
||||
}
|
||||
my $stemmer = Lingua::Stem::Snowball->new( lang => 'es' );
|
||||
$stemmer->stem_in_place(\@words);
|
||||
$decomposed = join " ", @words;
|
||||
return $decomposed;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user