2022-11-15 23:30:37 +01:00
|
|
|
package BurguillosInfo::Tracking;
|
|
|
|
|
|
|
|
use v5.34.1;
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
2023-05-02 20:52:39 +02:00
|
|
|
use feature 'signatures';
|
|
|
|
|
2022-11-15 23:30:37 +01:00
|
|
|
use JSON;
|
2022-11-17 02:17:44 +01:00
|
|
|
use Const::Fast;
|
2022-11-15 23:30:37 +01:00
|
|
|
|
|
|
|
use BurguillosInfo::DB;
|
|
|
|
|
|
|
|
my $app;
|
|
|
|
|
2022-11-17 02:17:44 +01:00
|
|
|
const my $SELECT_GLOBAL => <<'EOF';
|
2023-07-30 15:13:11 +02:00
|
|
|
SELECT COUNT(*)
|
2022-11-17 02:17:44 +01:00
|
|
|
FROM requests
|
|
|
|
EOF
|
|
|
|
|
2022-11-15 23:30:37 +01:00
|
|
|
sub new {
|
|
|
|
my $class = shift;
|
|
|
|
$app = shift;
|
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
|
|
|
return bless {}, $class;
|
|
|
|
}
|
|
|
|
|
2023-05-07 18:10:55 +02:00
|
|
|
sub _add_path ( $self, $url ) {
|
2023-05-02 20:52:39 +02:00
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
|
|
|
$dbh->do( <<'EOF', undef, $url );
|
|
|
|
INSERT INTO paths (path) VALUES($1)
|
|
|
|
ON CONFLICT (path) DO
|
|
|
|
UPDATE SET last_seen = NOW() where paths.path = $1;
|
|
|
|
EOF
|
|
|
|
}
|
|
|
|
|
2023-05-07 18:10:55 +02:00
|
|
|
sub _update_null_last_seen_paths_if_any ($self) {
|
2023-05-02 20:52:39 +02:00
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
2023-05-07 18:10:55 +02:00
|
|
|
$dbh->do( <<'EOF', undef );
|
2023-05-02 20:52:39 +02:00
|
|
|
|
|
|
|
UPDATE paths
|
|
|
|
SET last_seen = requests_for_path.last_date
|
|
|
|
FROM (
|
|
|
|
SELECT requests.path, max(requests.date) as last_date
|
|
|
|
FROM requests
|
|
|
|
GROUP BY requests.path
|
|
|
|
) requests_for_path
|
|
|
|
WHERE paths.last_seen IS NULL AND requests_for_path.path = paths.path;
|
|
|
|
EOF
|
|
|
|
}
|
|
|
|
|
2023-05-07 18:10:55 +02:00
|
|
|
sub _register_request_query ( $self, $remote_address, $user_agent,
|
|
|
|
$params_json, $path, $referer )
|
|
|
|
{
|
2023-05-02 20:52:39 +02:00
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
2023-08-08 19:46:41 +02:00
|
|
|
my $country = $self->_get_country($remote_address);
|
|
|
|
my $subdivision = $self->_get_subdivision($remote_address);
|
2023-08-08 19:39:13 +02:00
|
|
|
|
2023-05-02 20:52:39 +02:00
|
|
|
$dbh->do(
|
2023-08-08 19:39:13 +02:00
|
|
|
<<'EOF', undef, $remote_address, $user_agent, $params_json, $path, $referer, $country, $subdivision );
|
|
|
|
INSERT INTO requests(remote_address,
|
|
|
|
user_agent, params, path,
|
|
|
|
referer, country, subdivision)
|
|
|
|
VALUES (?, ?, ?, ?, ?, ?, ?);
|
|
|
|
EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
sub update_country_and_subdivision($self, $dbh, $uuid, $remote_address) {
|
|
|
|
my $country = $self->_get_country($remote_address);
|
|
|
|
my $subdivision = $self->_get_subdivision($remote_address);
|
|
|
|
$dbh->do(<<'EOF', undef, $country, $subdivision, $uuid);
|
|
|
|
UPDATE requests
|
|
|
|
SET country=?,
|
|
|
|
subdivision=?
|
|
|
|
WHERE uuid=?;
|
2023-05-02 20:52:39 +02:00
|
|
|
EOF
|
|
|
|
}
|
|
|
|
|
2023-08-08 19:39:13 +02:00
|
|
|
sub _get_country($self, $remote_address) {
|
|
|
|
my $geoip = $self->_geoip;
|
|
|
|
if (!defined $geoip) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
my $data = $geoip->record_for_address($remote_address);
|
|
|
|
return $data->{country}{names}{es};
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _get_subdivision($self, $remote_address) {
|
|
|
|
my $geoip = $self->_geoip;
|
|
|
|
if (!defined $geoip) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
my $data = $geoip->record_for_address($remote_address);
|
|
|
|
return $data->{subdivisions}[0]{names}{es};
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _geoip($self) {
|
|
|
|
require IP::Geolocation::MMDB;
|
|
|
|
my $path = $self->_geoip_path;
|
|
|
|
if (!defined $path) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
return IP::Geolocation::MMDB->new(file => $path);
|
|
|
|
}
|
|
|
|
|
|
|
|
sub _geoip_path($self) {
|
|
|
|
require BurguillosInfo;
|
|
|
|
my $app = BurguillosInfo->new;
|
|
|
|
my $config = $app->config->{geoip_database};
|
|
|
|
return $config;
|
|
|
|
}
|
|
|
|
|
2022-11-15 23:30:37 +01:00
|
|
|
sub register_request {
|
|
|
|
my $self = shift;
|
|
|
|
my $c = shift;
|
|
|
|
my $path = $c->req->url->path;
|
2023-08-25 17:40:48 +02:00
|
|
|
# Avoiding overloading the /stats endpoint.
|
|
|
|
return if $path =~ /\.json$/;
|
2023-05-07 18:10:55 +02:00
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
2023-05-02 20:52:39 +02:00
|
|
|
$self->_add_path($path);
|
|
|
|
$self->_update_null_last_seen_paths_if_any();
|
2022-11-15 23:30:37 +01:00
|
|
|
my $remote_address = $c->tx->remote_address;
|
|
|
|
my $user_agent = $c->req->headers->user_agent;
|
2023-05-07 18:10:55 +02:00
|
|
|
my $referer = $c->req->headers->referer // '';
|
2022-11-15 23:30:37 +01:00
|
|
|
my $params_json = encode_json( $c->req->params->to_hash );
|
2023-05-07 18:10:55 +02:00
|
|
|
$self->_register_request_query( $remote_address, $user_agent, $params_json,
|
|
|
|
$path, $referer );
|
|
|
|
say
|
|
|
|
"Registered $remote_address with user agent $user_agent visited $path with $params_json";
|
2022-11-15 23:30:37 +01:00
|
|
|
}
|
|
|
|
|
2023-05-07 18:10:55 +02:00
|
|
|
sub get_global_data {
|
|
|
|
my $self = shift;
|
|
|
|
my $c = shift;
|
|
|
|
my $app = $c->app;
|
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
|
|
|
my $data = $dbh->selectrow_hashref( <<"EOF", undef );
|
2022-11-17 00:44:20 +01:00
|
|
|
SELECT
|
2023-05-07 18:10:55 +02:00
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
2023-09-24 13:42:05 +02:00
|
|
|
where date > NOW() - interval '1 day'
|
2023-05-07 18:10:55 +02:00
|
|
|
) as unique_ips_last_24_hours,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
2023-09-24 13:42:05 +02:00
|
|
|
where date > NOW() - interval '1 week'
|
2023-05-07 18:10:55 +02:00
|
|
|
) as unique_ips_last_week,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
2023-09-24 13:42:05 +02:00
|
|
|
where date > NOW() - interval '1 month'
|
2023-05-07 18:10:55 +02:00
|
|
|
) as unique_ips_last_month;
|
|
|
|
EOF
|
|
|
|
return $data;
|
|
|
|
}
|
|
|
|
|
2023-05-07 18:20:42 +02:00
|
|
|
my $GOOGLE_REFERER_REGEX = "'^https?://(?:www\\.)?google\\.\\w'";
|
2023-05-07 18:10:55 +02:00
|
|
|
my $GOOGLE_SELECT = "$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path
|
|
|
|
and requests.referer IS NOT NULL
|
2023-05-07 18:20:42 +02:00
|
|
|
and requests.referer ~* $GOOGLE_REFERER_REGEX
|
2023-05-07 18:10:55 +02:00
|
|
|
and date > NOW()";
|
|
|
|
|
|
|
|
sub get_google_data {
|
|
|
|
my $self = shift;
|
|
|
|
my $c = shift;
|
|
|
|
my $app = $c->app;
|
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
|
|
|
my $data = $dbh->selectall_arrayref(<<"EOF", { Slice => {} } );
|
|
|
|
SELECT paths.path,
|
|
|
|
(
|
|
|
|
$GOOGLE_SELECT - interval '1 hour'
|
|
|
|
) as unique_ips_last_1_hour,
|
|
|
|
(
|
|
|
|
$GOOGLE_SELECT - interval '3 hour'
|
|
|
|
) as unique_ips_last_3_hours,
|
|
|
|
(
|
|
|
|
$GOOGLE_SELECT - interval '6 hour'
|
|
|
|
) as unique_ips_last_6_hours,
|
|
|
|
(
|
|
|
|
$GOOGLE_SELECT - interval '12 hour'
|
|
|
|
) as unique_ips_last_12_hours,
|
|
|
|
(
|
|
|
|
$GOOGLE_SELECT - interval '1 day'
|
|
|
|
) as unique_ips_last_24_hours,
|
|
|
|
(
|
|
|
|
$GOOGLE_SELECT - interval '1 week'
|
|
|
|
) as unique_ips_last_week,
|
|
|
|
(
|
|
|
|
$GOOGLE_SELECT - interval '1 month'
|
|
|
|
) as unique_ips_last_month
|
2023-05-07 18:20:42 +02:00
|
|
|
FROM paths right join requests on paths.path = requests.path
|
|
|
|
WHERE paths.last_seen > NOW() - INTERVAL '1 month'
|
|
|
|
and requests.referer ~* $GOOGLE_REFERER_REGEX
|
2023-05-07 22:51:48 +02:00
|
|
|
and requests.date > NOW() - INTERVAL '1 month'
|
2023-05-07 18:20:42 +02:00
|
|
|
GROUP BY
|
|
|
|
paths.path;
|
2022-11-17 00:44:20 +01:00
|
|
|
EOF
|
2023-05-07 18:10:55 +02:00
|
|
|
return $data;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub get_data_for_urls {
|
|
|
|
my $self = shift;
|
|
|
|
my $c = shift;
|
|
|
|
my $app = $c->app;
|
|
|
|
my $dbh = BurguillosInfo::DB->connect($app);
|
|
|
|
my $data = $dbh->selectall_arrayref( <<"EOF", { Slice => {} } );
|
2022-11-17 02:17:44 +01:00
|
|
|
SELECT paths.path,
|
2023-05-07 18:10:55 +02:00
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path and date > NOW() - interval '1 hour'
|
|
|
|
) as unique_ips_last_1_hour,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path and date > NOW() - interval '3 hour'
|
|
|
|
) as unique_ips_last_3_hours,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path and date > NOW() - interval '6 hour'
|
|
|
|
) as unique_ips_last_6_hours,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path and date > NOW() - interval '12 hour'
|
|
|
|
) as unique_ips_last_12_hours,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path and date > NOW() - interval '1 day'
|
|
|
|
) as unique_ips_last_24_hours,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path and date > NOW() - interval '1 week'
|
|
|
|
) as unique_ips_last_week,
|
|
|
|
(
|
|
|
|
$SELECT_GLOBAL
|
|
|
|
where requests.path = paths.path and date > NOW() - interval '1 month'
|
|
|
|
) as unique_ips_last_month
|
2023-05-02 20:57:09 +02:00
|
|
|
FROM paths
|
2023-05-02 21:50:50 +02:00
|
|
|
WHERE paths.last_seen > NOW() - INTERVAL '1 month';
|
2022-11-17 02:17:44 +01:00
|
|
|
EOF
|
2023-05-07 18:10:55 +02:00
|
|
|
return $data;
|
2022-11-17 02:17:44 +01:00
|
|
|
}
|
|
|
|
|
2022-11-15 23:30:37 +01:00
|
|
|
1;
|