From 8b8b8599e9359ea5c1212144c50ab406025016c5 Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Tue, 9 Jun 2020 21:49:24 +0400 Subject: [PATCH] Fix atom leak in Rich Media Parser --- .../web/mastodon_api/views/status_view.ex | 14 ++-- lib/pleroma/web/rich_media/helpers.ex | 6 +- lib/pleroma/web/rich_media/parser.ex | 12 +-- .../rich_media/parsers/meta_tags_parser.ex | 8 +- .../web/rich_media/parsers/oembed_parser.ex | 18 ++--- test/web/rich_media/parser_test.exs | 75 ++++++++++--------- .../rich_media/parsers/twitter_card_test.exs | 60 +++++++-------- 7 files changed, 91 insertions(+), 102 deletions(-) diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index a042075f5..f0bdb49c3 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -307,8 +307,8 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do page_url_data = URI.parse(page_url) page_url_data = - if rich_media[:url] != nil do - URI.merge(page_url_data, URI.parse(rich_media[:url])) + if is_binary(rich_media["url"]) do + URI.merge(page_url_data, URI.parse(rich_media["url"])) else page_url_data end @@ -316,11 +316,9 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do page_url = page_url_data |> to_string image_url = - if rich_media[:image] != nil do - URI.merge(page_url_data, URI.parse(rich_media[:image])) + if is_binary(rich_media["image"]) do + URI.merge(page_url_data, URI.parse(rich_media["image"])) |> to_string - else - nil end %{ @@ -329,8 +327,8 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do provider_url: page_url_data.scheme <> "://" <> page_url_data.host, url: page_url, image: image_url |> MediaProxy.url(), - title: rich_media[:title] || "", - description: rich_media[:description] || "", + title: rich_media["title"] || "", + description: rich_media["description"] || "", pleroma: %{ opengraph: rich_media } diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 9d3d7f978..1729141e9 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -9,7 +9,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.Object alias Pleroma.Web.RichMedia.Parser - @spec validate_page_url(any()) :: :ok | :error + @spec validate_page_url(URI.t() | binary()) :: :ok | :error defp validate_page_url(page_url) when is_binary(page_url) do validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld] @@ -18,8 +18,8 @@ defp validate_page_url(page_url) when is_binary(page_url) do |> parse_uri(page_url) end - defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority}) - when scheme == "https" and not is_nil(authority) do + defp validate_page_url(%URI{host: host, scheme: "https", authority: authority}) + when is_binary(authority) do cond do host in Config.get([:rich_media, :ignore_hosts], []) -> :error diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 0779065ee..7b45ecb9c 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -83,7 +83,7 @@ defp parse_url(url) do html |> parse_html() |> maybe_parse() - |> Map.put(:url, url) + |> Map.put("url", url) |> clean_parsed_data() |> check_parsed_data() rescue @@ -103,8 +103,8 @@ defp maybe_parse(html) do end) end - defp check_parsed_data(%{title: title} = data) - when is_binary(title) and byte_size(title) > 0 do + defp check_parsed_data(%{"title" => title} = data) + when is_binary(title) and title != "" do {:ok, data} end @@ -115,11 +115,7 @@ defp check_parsed_data(data) do defp clean_parsed_data(data) do data |> Enum.reject(fn {key, val} -> - with {:ok, _} <- Jason.encode(%{key => val}) do - false - else - _ -> true - end + not match?({:ok, _}, Jason.encode(%{key => val})) end) |> Map.new() end diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex index ae0f36702..2762b5902 100644 --- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -29,19 +29,19 @@ defp normalize_attributes(html_node, prefix, key_name, value_name) do {_tag, attributes, _children} = html_node data = - Enum.into(attributes, %{}, fn {name, value} -> + Map.new(attributes, fn {name, value} -> {name, String.trim_leading(value, "#{prefix}:")} end) - %{String.to_atom(data[key_name]) => data[value_name]} + %{data[key_name] => data[value_name]} end - defp maybe_put_title(%{title: _} = meta, _), do: meta + defp maybe_put_title(%{"title" => _} = meta, _), do: meta defp maybe_put_title(meta, html) when meta != %{} do case get_page_title(html) do "" -> meta - title -> Map.put_new(meta, :title, title) + title -> Map.put_new(meta, "title", title) end end diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex index 8f32bf91b..db8ccf15d 100644 --- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex +++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex @@ -5,7 +5,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do def parse(html, _data) do with elements = [_ | _] <- get_discovery_data(html), - {:ok, oembed_url} <- get_oembed_url(elements), + oembed_url when is_binary(oembed_url) <- get_oembed_url(elements), {:ok, oembed_data} <- get_oembed_data(oembed_url) do {:ok, oembed_data} else @@ -17,19 +17,13 @@ defp get_discovery_data(html) do html |> Floki.find("link[type='application/json+oembed']") end - defp get_oembed_url(nodes) do - {"link", attributes, _children} = nodes |> hd() - - {:ok, Enum.into(attributes, %{})["href"]} + defp get_oembed_url([{"link", attributes, _children} | _]) do + Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end) end defp get_oembed_data(url) do - {:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media]) - - {:ok, data} = Jason.decode(json) - - data = data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end) - - {:ok, data} + with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do + Jason.decode(json) + end end end diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs index e54a13bc8..420a612c6 100644 --- a/test/web/rich_media/parser_test.exs +++ b/test/web/rich_media/parser_test.exs @@ -60,19 +60,19 @@ test "returns error when no metadata present" do test "doesn't just add a title" do assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") == {:error, - "Found metadata was invalid or incomplete: %{url: \"http://example.com/non-ogp\"}"} + "Found metadata was invalid or incomplete: %{\"url\" => \"http://example.com/non-ogp\"}"} end test "parses ogp" do assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") == {:ok, %{ - image: "http://ia.media-imdb.com/images/rock.jpg", - title: "The Rock", - description: + "image" => "http://ia.media-imdb.com/images/rock.jpg", + "title" => "The Rock", + "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", - type: "video.movie", - url: "http://example.com/ogp" + "type" => "video.movie", + "url" => "http://example.com/ogp" }} end @@ -80,12 +80,12 @@ test "falls back to when ogp:title is missing" do assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") == {:ok, %{ - image: "http://ia.media-imdb.com/images/rock.jpg", - title: "The Rock (1996)", - description: + "image" => "http://ia.media-imdb.com/images/rock.jpg", + "title" => "The Rock (1996)", + "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", - type: "video.movie", - url: "http://example.com/ogp-missing-title" + "type" => "video.movie", + "url" => "http://example.com/ogp-missing-title" }} end @@ -93,12 +93,12 @@ test "parses twitter card" do assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") == {:ok, %{ - card: "summary", - site: "@flickr", - image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", - title: "Small Island Developing States Photo Submission", - description: "View the album on Flickr.", - url: "http://example.com/twitter-card" + "card" => "summary", + "site" => "@flickr", + "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", + "title" => "Small Island Developing States Photo Submission", + "description" => "View the album on Flickr.", + "url" => "http://example.com/twitter-card" }} end @@ -106,27 +106,28 @@ test "parses OEmbed" do assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") == {:ok, %{ - author_name: "‮‭‬bees‬", - author_url: "https://www.flickr.com/photos/bees/", - cache_age: 3600, - flickr_type: "photo", - height: "768", - html: + "author_name" => "‮‭‬bees‬", + "author_url" => "https://www.flickr.com/photos/bees/", + "cache_age" => 3600, + "flickr_type" => "photo", + "height" => "768", + "html" => "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by ‮‭‬bees‬, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>", - license: "All Rights Reserved", - license_id: 0, - provider_name: "Flickr", - provider_url: "https://www.flickr.com/", - thumbnail_height: 150, - thumbnail_url: "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg", - thumbnail_width: 150, - title: "Bacon Lollys", - type: "photo", - url: "http://example.com/oembed", - version: "1.0", - web_page: "https://www.flickr.com/photos/bees/2362225867/", - web_page_short_url: "https://flic.kr/p/4AK2sc", - width: "1024" + "license" => "All Rights Reserved", + "license_id" => 0, + "provider_name" => "Flickr", + "provider_url" => "https://www.flickr.com/", + "thumbnail_height" => 150, + "thumbnail_url" => + "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg", + "thumbnail_width" => 150, + "title" => "Bacon Lollys", + "type" => "photo", + "url" => "http://example.com/oembed", + "version" => "1.0", + "web_page" => "https://www.flickr.com/photos/bees/2362225867/", + "web_page_short_url" => "https://flic.kr/p/4AK2sc", + "width" => "1024" }} end diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs index 87c767c15..847623535 100644 --- a/test/web/rich_media/parsers/twitter_card_test.exs +++ b/test/web/rich_media/parsers/twitter_card_test.exs @@ -19,11 +19,11 @@ test "parses twitter card with only name attributes" do assert TwitterCard.parse(html, %{}) == {:ok, %{ - "app:id:googleplay": "com.nytimes.android", - "app:name:googleplay": "NYTimes", - "app:url:googleplay": "nytimes://reader/id/100000006583622", - site: nil, - title: + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "site" => nil, + "title" => "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times" }} end @@ -36,15 +36,15 @@ test "parses twitter card with only property attributes" do assert TwitterCard.parse(html, %{}) == {:ok, %{ - card: "summary_large_image", - description: + "card" => "summary_large_image", + "description" => "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", - image: + "image" => "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", - "image:alt": "", - title: + "image:alt" => "", + "title" => "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", - url: + "url" => "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" }} end @@ -57,19 +57,19 @@ test "parses twitter card with name & property attributes" do assert TwitterCard.parse(html, %{}) == {:ok, %{ - "app:id:googleplay": "com.nytimes.android", - "app:name:googleplay": "NYTimes", - "app:url:googleplay": "nytimes://reader/id/100000006583622", - card: "summary_large_image", - description: + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622", + "card" => "summary_large_image", + "description" => "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", - image: + "image" => "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", - "image:alt": "", - site: nil, - title: + "image:alt" => "", + "site" => nil, + "title" => "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", - url: + "url" => "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" }} end @@ -86,11 +86,11 @@ test "respect only first title tag on the page" do assert TwitterCard.parse(html, %{}) == {:ok, %{ - site: "@atlasobscura", - title: + "site" => "@atlasobscura", + "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura", - card: "summary_large_image", - image: image_path + "card" => "summary_large_image", + "image" => image_path }} end @@ -102,12 +102,12 @@ test "takes first founded title in html head if there is html markup error" do assert TwitterCard.parse(html, %{}) == {:ok, %{ - site: nil, - title: + "site" => nil, + "title" => "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times", - "app:id:googleplay": "com.nytimes.android", - "app:name:googleplay": "NYTimes", - "app:url:googleplay": "nytimes://reader/id/100000006583622" + "app:id:googleplay" => "com.nytimes.android", + "app:name:googleplay" => "NYTimes", + "app:url:googleplay" => "nytimes://reader/id/100000006583622" }} end end