[#3213] Added HashtagsCleanupWorker periodic job.

This commit is contained in:
Ivan Tashkinov 2021-01-31 18:24:19 +03:00
parent 380d0cce6b
commit 9948ff3356
5 changed files with 62 additions and 0 deletions

View File

@ -553,10 +553,12 @@
remote_fetcher: 2,
attachments_cleanup: 1,
new_users_digest: 1,
hashtags_cleanup: 1,
mute_expire: 5
],
plugins: [Oban.Plugins.Pruner],
crontab: [
{"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
]

View File

@ -1943,6 +1943,7 @@
type: {:list, :tuple},
description: "Settings for cron background jobs",
suggestions: [
{"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
]

View File

@ -152,6 +152,7 @@ def handle_info(:migrate_hashtags, state) do
defp query do
# Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out)
# Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
from(
object in Object,
where:

View File

@ -65,6 +65,7 @@ def change(struct, params \\ %{}) do
|> maybe_handle_hashtags_change(struct)
end
# Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
defp maybe_handle_hashtags_change(changeset, struct) do
with data_hashtags_change = get_change(changeset, :data),
true <- hashtags_changed?(struct, data_hashtags_change),

View File

@ -0,0 +1,57 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Workers.Cron.HashtagsCleanupWorker do
@moduledoc """
The worker to clean up unused hashtags_objects and hashtags.
"""
use Oban.Worker, queue: "hashtags_cleanup"
alias Pleroma.Repo
require Logger
@hashtags_objects_query """
DELETE FROM hashtags_objects WHERE object_id IN
(SELECT DISTINCT objects.id FROM objects
JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
(objects.data->>'id')
AND activities.data->>'type' = 'Create'
WHERE activities.id IS NULL);
"""
@hashtags_query """
DELETE FROM hashtags WHERE id IN
(SELECT hashtags.id FROM hashtags
LEFT OUTER JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id
WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.inserted_at < $1);
"""
@impl Oban.Worker
def perform(_job) do
Logger.info("Cleaning up unused `hashtags_objects` records...")
{:ok, %{num_rows: hashtags_objects_count}} =
Repo.query(@hashtags_objects_query, [], timeout: :infinity)
Logger.info("Deleted #{hashtags_objects_count} unused `hashtags_objects` records.")
Logger.info("Cleaning up unused `hashtags` records...")
# Note: ignoring recently created hashtags since references are added after hashtag is created
{:ok, %{num_rows: hashtags_count}} =
Repo.query(@hashtags_query, [NaiveDateTime.add(NaiveDateTime.utc_now(), -3600 * 24)],
timeout: :infinity
)
Logger.info("Deleted #{hashtags_count} unused `hashtags` records.")
Logger.info("HashtagsCleanupWorker complete.")
:ok
end
end