rich media: don't crawl bogus URIs

This commit is contained in:
William Pitcock 2019-02-24 19:13:46 +00:00
parent d45dbdce5a
commit d38d537bee

View File

@ -8,10 +8,17 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.HTML alias Pleroma.HTML
alias Pleroma.Web.RichMedia.Parser alias Pleroma.Web.RichMedia.Parser
defp validate_page_url(nil), do: :error
defp validate_page_url(%URI{authority: nil}), do: :error
defp validate_page_url(%URI{scheme: nil}), do: :error
defp validate_page_url(%URI{}), do: :ok
defp validate_page_url(page_url), do: URI.parse(page_url) |> validate_page_url
def fetch_data_for_activity(%Activity{} = activity) do def fetch_data_for_activity(%Activity{} = activity) do
with true <- Pleroma.Config.get([:rich_media, :enabled]), with true <- Pleroma.Config.get([:rich_media, :enabled]),
%Object{} = object <- Object.normalize(activity.data["object"]), %Object{} = object <- Object.normalize(activity.data["object"]),
{:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]), {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]),
:ok <- validate_page_url(page_url),
{:ok, rich_media} <- Parser.parse(page_url) do {:ok, rich_media} <- Parser.parse(page_url) do
%{page_url: page_url, rich_media: rich_media} %{page_url: page_url, rich_media: rich_media}
else else