[#3213] Fixed hashtags.name lookup (must use citext type to do index scan). Fixed embedded hashtags lookup (lowercasing), adjusted tests.

This commit is contained in:
Ivan Tashkinov 2021-02-15 21:13:14 +03:00
parent 349b8b0f4f
commit 1dac7d1462
3 changed files with 31 additions and 17 deletions

View File

@ -22,7 +22,9 @@ defmodule Pleroma.Hashtag do
end end
def get_by_name(name) do def get_by_name(name) do
Repo.get_by(Hashtag, name: name) from(h in Hashtag)
|> where([h], fragment("name = ?::citext", ^String.downcase(name)))
|> Repo.one()
end end
def get_or_create_by_name(name) when is_bitstring(name) do def get_or_create_by_name(name) when is_bitstring(name) do
@ -37,6 +39,7 @@ def get_or_create_by_name(name) when is_bitstring(name) do
end end
def get_or_create_by_names(names) when is_list(names) do def get_or_create_by_names(names) when is_list(names) do
names = Enum.map(names, &String.downcase/1)
timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
structs = structs =
@ -52,7 +55,8 @@ def get_or_create_by_names(names) when is_list(names) do
Multi.new() Multi.new()
|> Multi.insert_all(:insert_all_op, Hashtag, structs, on_conflict: :nothing) |> Multi.insert_all(:insert_all_op, Hashtag, structs, on_conflict: :nothing)
|> Multi.run(:query_op, fn _repo, _changes -> |> Multi.run(:query_op, fn _repo, _changes ->
{:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))} {:ok,
Repo.all(from(ht in Hashtag, where: ht.name in fragment("?::citext[]", ^names)))}
end) end)
|> Repo.transaction() do |> Repo.transaction() do
{:ok, hashtags} {:ok, hashtags}

View File

@ -698,6 +698,8 @@ defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true})
end end
defp restrict_embedded_tag_all(query, %{tag_all: [_ | _] = tag_all}) do defp restrict_embedded_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
tag_all = Enum.map(tag_all, &String.downcase/1)
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
@ -714,10 +716,12 @@ defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do
raise_on_missing_preload() raise_on_missing_preload()
end end
defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag}) do defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag_any}) do
tag_any = Enum.map(tag_any, &String.downcase/1)
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag) where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag_any)
) )
end end
@ -732,6 +736,8 @@ defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_pr
end end
defp restrict_embedded_tag_reject_any(query, %{tag_reject: [_ | _] = tag_reject}) do defp restrict_embedded_tag_reject_any(query, %{tag_reject: [_ | _] = tag_reject}) do
tag_reject = Enum.map(tag_reject, &String.downcase/1)
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
@ -749,6 +755,10 @@ defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
raise_on_missing_preload() raise_on_missing_preload()
end end
defp restrict_hashtag_all(query, %{tag_all: [single_tag]}) do
restrict_hashtag_any(query, %{tag: single_tag})
end
defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
from( from(
[_activity, object] in query, [_activity, object] in query,
@ -756,7 +766,7 @@ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
fragment( fragment(
""" """
(SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects (SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[])
AND hashtags_objects.object_id = ?) @> ? AND hashtags_objects.object_id = ?) @> ?
""", """,
^tags, ^tags,
@ -767,7 +777,7 @@ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
end end
defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do
restrict_hashtag_any(query, %{tag: tag}) restrict_hashtag_all(query, %{tag_all: [tag]})
end end
defp restrict_hashtag_all(query, _), do: query defp restrict_hashtag_all(query, _), do: query
@ -783,7 +793,7 @@ defp restrict_hashtag_any(query, %{tag: [_ | _] = tags}) do
fragment( fragment(
""" """
EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[])
AND hashtags_objects.object_id = ? LIMIT 1) AND hashtags_objects.object_id = ? LIMIT 1)
""", """,
^tags, ^tags,
@ -809,7 +819,7 @@ defp restrict_hashtag_reject_any(query, %{tag_reject: [_ | _] = tags_reject}) do
fragment( fragment(
""" """
NOT EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects NOT EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[])
AND hashtags_objects.object_id = ? LIMIT 1) AND hashtags_objects.object_id = ? LIMIT 1)
""", """,
^tags_reject, ^tags_reject,

View File

@ -213,24 +213,24 @@ test "works for guppe actors" do
test "it fetches the appropriate tag-restricted posts" do test "it fetches the appropriate tag-restricted posts" do
user = insert(:user) user = insert(:user)
{:ok, status_one} = CommonAPI.post(user, %{status: ". #test"}) {:ok, status_one} = CommonAPI.post(user, %{status: ". #TEST"})
{:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"}) {:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"})
{:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"}) {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #Reject"})
{:ok, status_four} = CommonAPI.post(user, %{status: ". #any1 #any2"}) {:ok, status_four} = CommonAPI.post(user, %{status: ". #Any1 #any2"})
{:ok, status_five} = CommonAPI.post(user, %{status: ". #any2 #any1"}) {:ok, status_five} = CommonAPI.post(user, %{status: ". #Any2 #any1"})
for hashtag_timeline_strategy <- [true, false] do for hashtag_timeline_strategy <- [true, false] do
clear_config([:database, :improved_hashtag_timeline], hashtag_timeline_strategy) clear_config([:database, :improved_hashtag_timeline], hashtag_timeline_strategy)
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]}) fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["TEST", "essais"]})
fetch_three = fetch_three =
ActivityPub.fetch_activities([], %{ ActivityPub.fetch_activities([], %{
type: "Create", type: "Create",
tag: ["test", "essais"], tag: ["test", "Essais"],
tag_reject: ["reject"] tag_reject: ["reject"]
}) })
@ -238,21 +238,21 @@ test "it fetches the appropriate tag-restricted posts" do
ActivityPub.fetch_activities([], %{ ActivityPub.fetch_activities([], %{
type: "Create", type: "Create",
tag: ["test"], tag: ["test"],
tag_all: ["test", "reject"] tag_all: ["test", "REJECT"]
}) })
# Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important # Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important
fetch_five = fetch_five =
ActivityPub.fetch_activities([], %{ ActivityPub.fetch_activities([], %{
type: "Create", type: "Create",
tag: ["any1", "any2"], tag: ["ANY1", "any2"],
limit: 2 limit: 2
}) })
fetch_six = fetch_six =
ActivityPub.fetch_activities([], %{ ActivityPub.fetch_activities([], %{
type: "Create", type: "Create",
tag: ["any1", "any2"], tag: ["any1", "Any2"],
tag_all: [], tag_all: [],
tag_reject: [] tag_reject: []
}) })