From 6531eddf361fa52db3906ab011a4e33c7a5f9552 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Mon, 22 Feb 2021 23:26:07 +0300 Subject: [PATCH] [#3213] `hashtags`: altered `name` type to `text`. `hashtags_objects`: removed unused index. HashtagsTableMigrator: records_per_second calculation fix. ActivityPub: hashtags-related options normalization. --- lib/pleroma/hashtag.ex | 22 ++++-- .../migrators/hashtags_table_migrator.ex | 4 +- lib/pleroma/web/activity_pub/activity_pub.ex | 75 ++++++++----------- ...20201221203824_create_hashtags_objects.exs | 2 +- ...emove_hashtags_objects_duplicate_index.exs | 11 +++ ...222184616_change_hashtags_name_to_text.exs | 15 ++++ 6 files changed, 76 insertions(+), 53 deletions(-) create mode 100644 priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs create mode 100644 priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex index a6d033816..e9d143fb1 100644 --- a/lib/pleroma/hashtag.ex +++ b/lib/pleroma/hashtag.ex @@ -21,10 +21,14 @@ defmodule Pleroma.Hashtag do timestamps() end + def normalize_name(name) do + name + |> String.downcase() + |> String.trim() + end + def get_by_name(name) do - from(h in Hashtag) - |> where([h], fragment("name = ?::citext", ^String.downcase(name))) - |> Repo.one() + Repo.get_by(Hashtag, name: normalize_name(name)) end def get_or_create_by_name(name) when is_bitstring(name) do @@ -39,7 +43,7 @@ def get_or_create_by_name(name) when is_bitstring(name) do end def get_or_create_by_names(names) when is_list(names) do - names = Enum.map(names, &String.downcase/1) + names = Enum.map(names, &normalize_name/1) timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) structs = @@ -53,10 +57,12 @@ def get_or_create_by_names(names) when is_list(names) do try do with {:ok, %{query_op: hashtags}} <- Multi.new() - |> Multi.insert_all(:insert_all_op, Hashtag, structs, on_conflict: :nothing) + |> Multi.insert_all(:insert_all_op, Hashtag, structs, + on_conflict: :nothing, + conflict_target: :name + ) |> Multi.run(:query_op, fn _repo, _changes -> - {:ok, - Repo.all(from(ht in Hashtag, where: ht.name in fragment("?::citext[]", ^names)))} + {:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))} end) |> Repo.transaction() do {:ok, hashtags} @@ -71,7 +77,7 @@ def get_or_create_by_names(names) when is_list(names) do def changeset(%Hashtag{} = struct, params) do struct |> cast(params, [:name]) - |> update_change(:name, &String.downcase/1) + |> update_change(:name, &normalize_name/1) |> validate_required([:name]) |> unique_constraint(:name) end diff --git a/lib/pleroma/migrators/hashtags_table_migrator.ex b/lib/pleroma/migrators/hashtags_table_migrator.ex index 45dab8470..07bb9aeb2 100644 --- a/lib/pleroma/migrators/hashtags_table_migrator.ex +++ b/lib/pleroma/migrators/hashtags_table_migrator.ex @@ -82,6 +82,7 @@ def handle_info(:migrate_hashtags, state) do State.reinit() update_status(:running) + put_stat(:iteration_processed_count, 0) put_stat(:started_at, NaiveDateTime.utc_now()) data_migration_id = data_migration_id() @@ -127,6 +128,7 @@ def handle_info(:migrate_hashtags, state) do max_object_id = Enum.at(object_ids, -1) put_stat(:max_processed_id, max_object_id) + increment_stat(:iteration_processed_count, length(object_ids)) increment_stat(:processed_count, length(object_ids)) increment_stat(:failed_count, length(failed_ids)) increment_stat(:affected_count, chunk_affected_count) @@ -176,7 +178,7 @@ def fault_rate do end defp records_per_second do - get_stat(:processed_count, 0) / Enum.max([running_time(), 1]) + get_stat(:iteration_processed_count, 0) / Enum.max([running_time(), 1]) end defp running_time do diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 5392ce7c9..8182bc205 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -10,6 +10,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do alias Pleroma.Conversation alias Pleroma.Conversation.Participation alias Pleroma.Filter + alias Pleroma.Hashtag alias Pleroma.Maps alias Pleroma.Notification alias Pleroma.Object @@ -698,8 +699,6 @@ defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) end defp restrict_embedded_tag_all(query, %{tag_all: [_ | _] = tag_all}) do - tag_all = Enum.map(tag_all, &String.downcase/1) - from( [_activity, object] in query, where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) @@ -717,8 +716,6 @@ defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do end defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag_any}) do - tag_any = Enum.map(tag_any, &String.downcase/1) - from( [_activity, object] in query, where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag_any) @@ -736,8 +733,6 @@ defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_pr end defp restrict_embedded_tag_reject_any(query, %{tag_reject: [_ | _] = tag_reject}) do - tag_reject = Enum.map(tag_reject, &String.downcase/1) - from( [_activity, object] in query, where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) @@ -766,7 +761,7 @@ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do fragment( """ (SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects - ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[]) + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) AND hashtags_objects.object_id = ?) @> ? """, ^tags, @@ -787,42 +782,19 @@ defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do end defp restrict_hashtag_any(query, %{tag: [_ | _] = tags}) do - # TODO: refactor: debug / experimental feature - if Config.get([:database, :improved_hashtag_timeline]) == :preselect_hashtag_ids do - hashtag_ids = - from(ht in Pleroma.Hashtag, - where: fragment("name = ANY(?::citext[])", ^tags), - select: ht.id + from( + [_activity, object] in query, + where: + fragment( + """ + EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) + AND hashtags_objects.object_id = ? LIMIT 1) + """, + ^tags, + object.id ) - |> Repo.all() - - from( - [_activity, object] in query, - where: - fragment( - """ - EXISTS ( - SELECT 1 FROM hashtags_objects WHERE hashtag_id = ANY(?) AND object_id = ? LIMIT 1) - """, - ^hashtag_ids, - object.id - ) - ) - else - from( - [_activity, object] in query, - where: - fragment( - """ - EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects - ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[]) - AND hashtags_objects.object_id = ? LIMIT 1) - """, - ^tags, - object.id - ) - ) - end + ) end defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do @@ -842,7 +814,7 @@ defp restrict_hashtag_reject_any(query, %{tag_reject: [_ | _] = tags_reject}) do fragment( """ NOT EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects - ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[]) + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) AND hashtags_objects.object_id = ? LIMIT 1) """, ^tags_reject, @@ -1220,6 +1192,21 @@ defp maybe_order(query, %{order: :asc}) do defp maybe_order(query, _), do: query + defp normalize_fetch_activities_query_opts(opts) do + Enum.reduce([:tag, :tag_all, :tag_reject], opts, fn key, opts -> + case opts[key] do + value when is_bitstring(value) -> + Map.put(opts, key, Hashtag.normalize_name(value)) + + value when is_list(value) -> + Map.put(opts, key, Enum.map(value, &Hashtag.normalize_name/1)) + + _ -> + opts + end + end) + end + defp fetch_activities_query_ap_ids_ops(opts) do source_user = opts[:muting_user] ap_id_relationships = if source_user, do: [:mute, :reblog_mute], else: [] @@ -1243,6 +1230,8 @@ defp fetch_activities_query_ap_ids_ops(opts) do end def fetch_activities_query(recipients, opts \\ %{}) do + opts = normalize_fetch_activities_query_opts(opts) + {restrict_blocked_opts, restrict_muted_opts, restrict_muted_reblogs_opts} = fetch_activities_query_ap_ids_ops(opts) diff --git a/priv/repo/migrations/20201221203824_create_hashtags_objects.exs b/priv/repo/migrations/20201221203824_create_hashtags_objects.exs index efd60369d..581f32b3c 100644 --- a/priv/repo/migrations/20201221203824_create_hashtags_objects.exs +++ b/priv/repo/migrations/20201221203824_create_hashtags_objects.exs @@ -7,7 +7,7 @@ def change do add(:object_id, references(:objects), null: false, primary_key: true) end - create_if_not_exists(unique_index(:hashtags_objects, [:hashtag_id, :object_id])) + # Note: PK index: "hashtags_objects_pkey" PRIMARY KEY, btree (hashtag_id, object_id) create_if_not_exists(index(:hashtags_objects, [:object_id])) end end diff --git a/priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs b/priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs new file mode 100644 index 000000000..6c4a2dfdc --- /dev/null +++ b/priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs @@ -0,0 +1,11 @@ +defmodule Pleroma.Repo.Migrations.RemoveHashtagsObjectsDuplicateIndex do + use Ecto.Migration + + @moduledoc "Removes `hashtags_objects_hashtag_id_object_id_index` index (duplicate of PK index)." + + def up do + drop_if_exists(unique_index(:hashtags_objects, [:hashtag_id, :object_id])) + end + + def down, do: nil +end diff --git a/priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs b/priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs new file mode 100644 index 000000000..8940b6ca3 --- /dev/null +++ b/priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs @@ -0,0 +1,15 @@ +defmodule Pleroma.Repo.Migrations.ChangeHashtagsNameToText do + use Ecto.Migration + + def up do + alter table(:hashtags) do + modify(:name, :text) + end + end + + def down do + alter table(:hashtags) do + modify(:name, :citext) + end + end +end