burguillos.info/lib/BurguillosInfo/Tracking.pm

191 lines
5.2 KiB
Perl
Raw Normal View History

package BurguillosInfo::Tracking;
use v5.34.1;
use strict;
use warnings;
2023-05-02 20:52:39 +02:00
use feature 'signatures';
use JSON;
2022-11-17 02:17:44 +01:00
use Const::Fast;
use BurguillosInfo::DB;
my $app;
2022-11-17 02:17:44 +01:00
const my $SELECT_GLOBAL => <<'EOF';
SELECT COUNT(DISTINCT (remote_address, user_agent))
FROM requests
EOF
sub new {
my $class = shift;
$app = shift;
my $dbh = BurguillosInfo::DB->connect($app);
return bless {}, $class;
}
sub _add_path ( $self, $url ) {
2023-05-02 20:52:39 +02:00
my $dbh = BurguillosInfo::DB->connect($app);
$dbh->do( <<'EOF', undef, $url );
INSERT INTO paths (path) VALUES($1)
ON CONFLICT (path) DO
UPDATE SET last_seen = NOW() where paths.path = $1;
EOF
}
sub _update_null_last_seen_paths_if_any ($self) {
2023-05-02 20:52:39 +02:00
my $dbh = BurguillosInfo::DB->connect($app);
$dbh->do( <<'EOF', undef );
2023-05-02 20:52:39 +02:00
UPDATE paths
SET last_seen = requests_for_path.last_date
FROM (
SELECT requests.path, max(requests.date) as last_date
FROM requests
GROUP BY requests.path
) requests_for_path
WHERE paths.last_seen IS NULL AND requests_for_path.path = paths.path;
EOF
}
sub _register_request_query ( $self, $remote_address, $user_agent,
$params_json, $path, $referer )
{
2023-05-02 20:52:39 +02:00
my $dbh = BurguillosInfo::DB->connect($app);
$dbh->do(
<<'EOF', undef, $remote_address, $user_agent, $params_json, $path, $referer );
INSERT INTO requests(remote_address, user_agent, params, path, referer)
VALUES (?, ?, ?, ?, ?);
2023-05-02 20:52:39 +02:00
EOF
}
sub register_request {
my $self = shift;
my $c = shift;
my $path = $c->req->url->path;
my $dbh = BurguillosInfo::DB->connect($app);
2023-05-02 20:52:39 +02:00
$self->_add_path($path);
$self->_update_null_last_seen_paths_if_any();
my $remote_address = $c->tx->remote_address;
my $user_agent = $c->req->headers->user_agent;
my $referer = $c->req->headers->referer // '';
my $params_json = encode_json( $c->req->params->to_hash );
$self->_register_request_query( $remote_address, $user_agent, $params_json,
$path, $referer );
say
"Registered $remote_address with user agent $user_agent visited $path with $params_json";
}
sub get_global_data {
my $self = shift;
my $c = shift;
my $app = $c->app;
my $dbh = BurguillosInfo::DB->connect($app);
my $data = $dbh->selectrow_hashref( <<"EOF", undef );
2022-11-17 00:44:20 +01:00
SELECT
(
$SELECT_GLOBAL
where date > NOW() - interval '1 day'
) as unique_ips_last_24_hours,
(
$SELECT_GLOBAL
where date > NOW() - interval '1 week'
) as unique_ips_last_week,
(
$SELECT_GLOBAL
where date > NOW() - interval '1 month'
) as unique_ips_last_month;
EOF
return $data;
}
my $GOOGLE_REFERER_REGEX = "'^https?://(?:www\\.)?google\\.\\w'";
my $GOOGLE_SELECT = "$SELECT_GLOBAL
where requests.path = paths.path
and requests.referer IS NOT NULL
and requests.referer ~* $GOOGLE_REFERER_REGEX
and date > NOW()";
sub get_google_data {
my $self = shift;
my $c = shift;
my $app = $c->app;
my $dbh = BurguillosInfo::DB->connect($app);
my $data = $dbh->selectall_arrayref(<<"EOF", { Slice => {} } );
SELECT paths.path,
(
$GOOGLE_SELECT - interval '1 hour'
) as unique_ips_last_1_hour,
(
$GOOGLE_SELECT - interval '3 hour'
) as unique_ips_last_3_hours,
(
$GOOGLE_SELECT - interval '6 hour'
) as unique_ips_last_6_hours,
(
$GOOGLE_SELECT - interval '12 hour'
) as unique_ips_last_12_hours,
(
$GOOGLE_SELECT - interval '1 day'
) as unique_ips_last_24_hours,
(
$GOOGLE_SELECT - interval '1 week'
) as unique_ips_last_week,
(
$GOOGLE_SELECT - interval '1 month'
) as unique_ips_last_month
FROM paths right join requests on paths.path = requests.path
WHERE paths.last_seen > NOW() - INTERVAL '1 month'
and requests.referer ~* $GOOGLE_REFERER_REGEX
and requests.date > NOW() - INTERVAL '1 month'
GROUP BY
paths.path;
2022-11-17 00:44:20 +01:00
EOF
return $data;
}
sub get_data_for_urls {
my $self = shift;
my $c = shift;
my $app = $c->app;
my $dbh = BurguillosInfo::DB->connect($app);
my $data = $dbh->selectall_arrayref( <<"EOF", { Slice => {} } );
2022-11-17 02:17:44 +01:00
SELECT paths.path,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 hour'
) as unique_ips_last_1_hour,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '3 hour'
) as unique_ips_last_3_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '6 hour'
) as unique_ips_last_6_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '12 hour'
) as unique_ips_last_12_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 day'
) as unique_ips_last_24_hours,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 week'
) as unique_ips_last_week,
(
$SELECT_GLOBAL
where requests.path = paths.path and date > NOW() - interval '1 month'
) as unique_ips_last_month
2023-05-02 20:57:09 +02:00
FROM paths
WHERE paths.last_seen > NOW() - INTERVAL '1 month';
2022-11-17 02:17:44 +01:00
EOF
return $data;
2022-11-17 02:17:44 +01:00
}
1;