From ed8f55ab8eb292903cec8f7699aa6775cc304458 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Fri, 18 Jan 2019 10:35:45 +0300 Subject: [PATCH] [#477] User: FTS and trigram search results mixing (to handle misspelled requests). --- lib/pleroma/user.ex | 138 ++++++++++-------- test/user_test.exs | 6 + .../twitter_api_controller_test.exs | 2 +- 3 files changed, 87 insertions(+), 59 deletions(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 8ae36416a..1d0bf1edf 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -679,13 +679,35 @@ def get_recipients_from_activity(%Activity{recipients: to}) do end def search(query, resolve \\ false, for_user \\ nil) do - # strip the beginning @ off if there is a query + # Strip the beginning @ off if there is a query query = String.trim_leading(query, "@") - if resolve do - User.get_or_fetch_by_nickname(query) - end + if resolve, do: User.get_or_fetch_by_nickname(query) + fts_results = do_search(fts_search_subquery(query), for_user) + + trigram_results = do_search(trigram_search_subquery(query), for_user) + + Enum.uniq_by(fts_results ++ trigram_results, & &1.id) + end + + defp do_search(subquery, for_user, options \\ []) do + q = + from( + s in subquery(subquery), + order_by: [desc: s.search_rank], + limit: ^(options[:limit] || 20) + ) + + results = + q + |> Repo.all() + |> Enum.filter(&(&1.search_rank > 0)) + + boost_search_results(results, for_user) + end + + defp fts_search_subquery(query) do processed_query = query |> String.replace(~r/\W+/, " ") @@ -694,69 +716,69 @@ def search(query, resolve \\ false, for_user \\ nil) do |> Enum.map(&(&1 <> ":*")) |> Enum.join(" | ") - inner = - from( - u in User, - select_merge: %{ - search_rank: - fragment( - """ - ts_rank_cd( - setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), - to_tsquery('simple', ?), - 32 - ) - """, - u.nickname, - u.name, - ^processed_query + from( + u in User, + select_merge: %{ + search_rank: + fragment( + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?), + 32 ) - }, - where: not is_nil(u.nickname) - ) + """, + u.nickname, + u.name, + ^processed_query + ) + }, + where: not is_nil(u.nickname) + ) + end - q = - from( - s in subquery(inner), - order_by: [desc: s.search_rank], - limit: 20 - ) + defp trigram_search_subquery(query) do + from( + u in User, + select_merge: %{ + search_rank: + fragment( + "similarity(?, ? || ' ' || coalesce(?, ''))", + ^query, + u.nickname, + u.name + ) + }, + where: not is_nil(u.nickname) + ) + end - results = - q - |> Repo.all() - |> Enum.filter(&(&1.search_rank > 0)) + defp boost_search_results(results, nil), do: results - weighted_results = - if for_user do - friends_ids = get_friends_ids(for_user) - followers_ids = get_followers_ids(for_user) + defp boost_search_results(results, for_user) do + friends_ids = get_friends_ids(for_user) + followers_ids = get_followers_ids(for_user) - Enum.map( - results, - fn u -> - search_rank_coef = - cond do - u.id in friends_ids -> - 1.2 + Enum.map( + results, + fn u -> + search_rank_coef = + cond do + u.id in friends_ids -> + 1.2 - u.id in followers_ids -> - 1.1 + u.id in followers_ids -> + 1.1 - true -> - 1 - end - - Map.put(u, :search_rank, u.search_rank * search_rank_coef) + true -> + 1 end - ) - |> Enum.sort_by(&(-&1.search_rank)) - else - results - end - weighted_results + Map.put(u, :search_rank, u.search_rank * search_rank_coef) + end + ) + |> Enum.sort_by(&(-&1.search_rank)) end def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do diff --git a/test/user_test.exs b/test/user_test.exs index 48b7b72ec..339def217 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -814,6 +814,12 @@ test "finds users, ranking by similarity" do assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id) end + test "finds users, handling misspelled requests" do + u1 = insert(:user, %{name: "lain"}) + + assert [u1.id] == Enum.map(User.search("laiin"), & &1.id) + end + test "finds users, boosting ranks of friends and followers" do u1 = insert(:user) u2 = insert(:user, %{name: "Doe"}) diff --git a/test/web/twitter_api/twitter_api_controller_test.exs b/test/web/twitter_api/twitter_api_controller_test.exs index a4baf2b5f..e013d1aca 100644 --- a/test/web/twitter_api/twitter_api_controller_test.exs +++ b/test/web/twitter_api/twitter_api_controller_test.exs @@ -1656,7 +1656,7 @@ test "it denies a friend request" do test "it returns users, ordered by similarity", %{conn: conn} do user = insert(:user, %{name: "eal"}) user_two = insert(:user, %{name: "eal me"}) - _user_three = insert(:user, %{name: "ebn"}) + _user_three = insert(:user, %{name: "zzz"}) resp = conn