burguillos.info/lib/BurguillosInfo/Tracking.pm

243 lines
6.6 KiB
Perl

package BurguillosInfo::Tracking;
use v5.34.1;
use strict;
use warnings;
use feature 'signatures';
use JSON;
use Const::Fast;
use BurguillosInfo::DB;
my $app;
const my $SELECT_GLOBAL => <<'EOF';
SELECT COUNT(*)
FROM requests
EOF
sub new {
my $class = shift;
$app = shift;
my $dbh = BurguillosInfo::DB->connect($app);
return bless {}, $class;
}
sub _add_path ( $self, $url ) {
my $dbh = BurguillosInfo::DB->connect($app);
$dbh->do( <<'EOF', undef, $url );
INSERT INTO paths (path) VALUES($1)
ON CONFLICT (path) DO
UPDATE SET last_seen = NOW() where paths.path = $1;
EOF
}
sub _update_null_last_seen_paths_if_any ($self) {
my $dbh = BurguillosInfo::DB->connect($app);
$dbh->do( <<'EOF', undef );
UPDATE paths
SET last_seen = requests_for_path.last_date
FROM (
SELECT requests.path, max(requests.date) as last_date
FROM requests
GROUP BY requests.path
) requests_for_path
WHERE paths.last_seen IS NULL AND requests_for_path.path = paths.path;
EOF
}
sub _register_request_query ( $self, $remote_address, $user_agent,
$params_json, $path, $referer )
{
my $dbh = BurguillosInfo::DB->connect($app);
my $country = $self->_get_country($remote_address);
my $subdivision = $self->_get_subdivision($remote_address);
$dbh->do(
<<'EOF', undef, $remote_address, $user_agent, $params_json, $path, $referer, $country, $subdivision );
INSERT INTO requests(remote_address,
user_agent, params, path,
referer, country, subdivision)
VALUES (?, ?, ?, ?, ?, ?, ?);
EOF
}
sub update_country_and_subdivision($self, $dbh, $uuid, $remote_address) {
my $country = $self->_get_country($remote_address);
my $subdivision = $self->_get_subdivision($remote_address);
$dbh->do(<<'EOF', undef, $country, $subdivision, $uuid);
UPDATE requests
SET country=?,
subdivision=?
WHERE uuid=?;
EOF
}
sub _get_country($self, $remote_address) {
my $geoip = $self->_geoip;
if (!defined $geoip) {
return;
}
my $data = $geoip->record_for_address($remote_address);
return $data->{country}{names}{es};
}
sub _get_subdivision($self, $remote_address) {
my $geoip = $self->_geoip;
if (!defined $geoip) {
return;
}
my $data = $geoip->record_for_address($remote_address);
return $data->{subdivisions}[0]{names}{es};
}
sub _geoip($self) {
require IP::Geolocation::MMDB;
my $path = $self->_geoip_path;
if (!defined $path) {
return;
}
return IP::Geolocation::MMDB->new(file => $path);
}
sub _geoip_path($self) {
require BurguillosInfo;
my $app = BurguillosInfo->new;
my $config = $app->config->{geoip_database};
return $config;
}
sub register_request {
my $self = shift;
my $c = shift;
my $path = $c->req->url->path;
# Avoiding overloading the /stats endpoint.
return if $path =~ /\.json$/;
my $dbh = BurguillosInfo::DB->connect($app);
$self->_add_path($path);
$self->_update_null_last_seen_paths_if_any();
my $remote_address = $c->tx->remote_address;
my $user_agent = $c->req->headers->user_agent;
my $referer = $c->req->headers->referer // '';
my $params_json = encode_json( $c->req->params->to_hash );
$self->_register_request_query( $remote_address, $user_agent, $params_json,
$path, $referer );
say
"Registered $remote_address with user agent $user_agent visited $path with $params_json";
}
sub get_global_data {
my $self = shift;
my $c = shift;
my $app = $c->app;
my $dbh = BurguillosInfo::DB->connect($app);
my $data = $dbh->selectrow_hashref( <<"EOF", undef );
SELECT
(
$SELECT_GLOBAL
where date > NOW() - interval '1 day'
) as unique_ips_last_24_hours,
(
$SELECT_GLOBAL
where date > NOW() - interval '1 week'
) as unique_ips_last_week,
(
$SELECT_GLOBAL
where date > NOW() - interval '1 month'
) as unique_ips_last_month;
EOF
return $data;
}
my $GOOGLE_REFERER_REGEX = "'^https?://(?:www\\.)?google\\.\\w'";
my $GOOGLE_SELECT = "$SELECT_GLOBAL
where requests.path = paths.path
and requests.referer IS NOT NULL
and requests.referer ~* $GOOGLE_REFERER_REGEX
and date > NOW()";
sub get_google_data {
my $self = shift;
my $c = shift;
my $app = $c->app;
my $dbh = BurguillosInfo::DB->connect($app);
my $data = $dbh->selectall_arrayref(<<"EOF", { Slice => {} } );
SELECT paths.path,
(
$GOOGLE_SELECT - interval '1 hour'
) as unique_ips_last_1_hour,
(
$GOOGLE_SELECT - interval '3 hour'
) as unique_ips_last_3_hours,
(
$GOOGLE_SELECT - interval '6 hour'
) as unique_ips_last_6_hours,
(
$GOOGLE_SELECT - interval '12 hour'
) as unique_ips_last_12_hours,
(
$GOOGLE_SELECT - interval '1 day'
) as unique_ips_last_24_hours,
(
$GOOGLE_SELECT - interval '1 week'
) as unique_ips_last_week,
(
$GOOGLE_SELECT - interval '1 month'
) as unique_ips_last_month
FROM paths right join requests on paths.path = requests.path
WHERE paths.last_seen > NOW() - INTERVAL '1 month'
and requests.referer ~* $GOOGLE_REFERER_REGEX
and requests.date > NOW() - INTERVAL '1 month'
GROUP BY
paths.path;
EOF
return $data;
}
sub get_data_for_urls {
my $self = shift;
my $c = shift;
my $app = $c->app;
my $dbh = BurguillosInfo::DB->connect($app);
my $data = $dbh->selectall_arrayref( <<"EOF", { Slice => {} } );
SELECT paths.path,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 hour'
) as unique_ips_last_1_hour,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '3 hour'
) as unique_ips_last_3_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '6 hour'
) as unique_ips_last_6_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '12 hour'
) as unique_ips_last_12_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 day'
) as unique_ips_last_24_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 week'
) as unique_ips_last_week,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 month'
) as unique_ips_last_month
FROM paths
WHERE paths.last_seen > NOW() - INTERVAL '1 month';
EOF
return $data;
}
1;