From 077ae5afd2cbff1bb937d48d73dcddb083052457 Mon Sep 17 00:00:00 2001 From: Sergiotarxz Date: Mon, 4 Sep 2023 17:48:50 +0200 Subject: [PATCH] Missing normalization. --- lib/BurguillosInfo/IndexUtils.pm | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 lib/BurguillosInfo/IndexUtils.pm diff --git a/lib/BurguillosInfo/IndexUtils.pm b/lib/BurguillosInfo/IndexUtils.pm new file mode 100644 index 0000000..c6e7e9d --- /dev/null +++ b/lib/BurguillosInfo/IndexUtils.pm @@ -0,0 +1,27 @@ +package BurguillosInfo::IndexUtils; + +use v5.36.0; + +use strict; +use warnings; +use utf8; + +use feature 'signatures'; + +use Unicode::Normalize qw/NFKD/; + +use Moo; + +sub normalize($self, $text) { + return undef if !defined $text; + my $decomposed = NFKD( $text ); + $decomposed =~ s/\p{NonspacingMark}//g; + $decomposed =~ s/s\b//g; + $decomposed =~ s/a\b/o/g; + return $decomposed; +} + +sub n(@args) { + normalize(@args); +} +1;