From f264d930cc00c463d0f506a94f6f6b494aab7022 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Sun, 24 Jan 2021 23:27:02 +0300 Subject: [PATCH] [#3213] Speedup of HashtagsTableMigrator (query optimization). State handling fix. --- .../migrators/hashtags_table_migrator.ex | 18 +++++++++++++++--- .../migrators/hashtags_table_migrator/state.ex | 4 ++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/migrators/hashtags_table_migrator.ex b/lib/pleroma/migrators/hashtags_table_migrator.ex index 8ad2c8c73..6a1c9592c 100644 --- a/lib/pleroma/migrators/hashtags_table_migrator.ex +++ b/lib/pleroma/migrators/hashtags_table_migrator.ex @@ -72,6 +72,8 @@ def handle_continue(:init_state, _state) do @impl true def handle_info(:migrate_hashtags, state) do + State.clear() + data_migration = data_migration() persistent_data = Map.take(data_migration.data, ["max_processed_id"]) @@ -152,8 +154,6 @@ defp query do # Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out) from( object in Object, - left_join: hashtag in assoc(object, :hashtags), - where: is_nil(hashtag.id), where: fragment("(?)->'tag' IS NOT NULL AND (?)->'tag' != '[]'::jsonb", object.data, object.data), select: %{ @@ -161,12 +161,24 @@ defp query do tag: fragment("(?)->'tag'", object.data) } ) + |> join(:left, [o], hashtags_objects in fragment("SELECT object_id FROM hashtags_objects"), + on: hashtags_objects.object_id == o.id + ) + |> where([_o, hashtags_objects], is_nil(hashtags_objects.object_id)) end defp transfer_object_hashtags(object) do - embedded_tags = (Map.has_key?(object, :tag) && object.tag) || object.data["tag"] + embedded_tags = if Map.has_key?(object, :tag), do: object.tag, else: object.data["tag"] hashtags = Object.object_data_hashtags(%{"tag" => embedded_tags}) + if Enum.any?(hashtags) do + transfer_object_hashtags(object, hashtags) + else + {:ok, object.id} + end + end + + defp transfer_object_hashtags(object, hashtags) do Repo.transaction(fn -> with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do for hashtag_record <- hashtag_records do diff --git a/lib/pleroma/migrators/hashtags_table_migrator/state.ex b/lib/pleroma/migrators/hashtags_table_migrator/state.ex index 43f7270e2..901563426 100644 --- a/lib/pleroma/migrators/hashtags_table_migrator/state.ex +++ b/lib/pleroma/migrators/hashtags_table_migrator/state.ex @@ -12,6 +12,10 @@ def start_link(_) do Agent.start_link(fn -> @init_state end, name: @reg_name) end + def clear do + Agent.update(@reg_name, fn _state -> @init_state end) + end + def get do Agent.get(@reg_name, & &1) end