From 8b8b8599e9359ea5c1212144c50ab406025016c5 Mon Sep 17 00:00:00 2001
From: Egor Kislitsyn <egor@kislitsyn.com>
Date: Tue, 9 Jun 2020 21:49:24 +0400
Subject: [PATCH] Fix atom leak in Rich Media Parser

---
 .../web/mastodon_api/views/status_view.ex     | 14 ++--
 lib/pleroma/web/rich_media/helpers.ex         |  6 +-
 lib/pleroma/web/rich_media/parser.ex          | 12 +--
 .../rich_media/parsers/meta_tags_parser.ex    |  8 +-
 .../web/rich_media/parsers/oembed_parser.ex   | 18 ++---
 test/web/rich_media/parser_test.exs           | 75 ++++++++++---------
 .../rich_media/parsers/twitter_card_test.exs  | 60 +++++++--------
 7 files changed, 91 insertions(+), 102 deletions(-)

diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex
index a042075f5..f0bdb49c3 100644
--- a/lib/pleroma/web/mastodon_api/views/status_view.ex
+++ b/lib/pleroma/web/mastodon_api/views/status_view.ex
@@ -307,8 +307,8 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
     page_url_data = URI.parse(page_url)
 
     page_url_data =
-      if rich_media[:url] != nil do
-        URI.merge(page_url_data, URI.parse(rich_media[:url]))
+      if is_binary(rich_media["url"]) do
+        URI.merge(page_url_data, URI.parse(rich_media["url"]))
       else
         page_url_data
       end
@@ -316,11 +316,9 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
     page_url = page_url_data |> to_string
 
     image_url =
-      if rich_media[:image] != nil do
-        URI.merge(page_url_data, URI.parse(rich_media[:image]))
+      if is_binary(rich_media["image"]) do
+        URI.merge(page_url_data, URI.parse(rich_media["image"]))
         |> to_string
-      else
-        nil
       end
 
     %{
@@ -329,8 +327,8 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
       provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
       url: page_url,
       image: image_url |> MediaProxy.url(),
-      title: rich_media[:title] || "",
-      description: rich_media[:description] || "",
+      title: rich_media["title"] || "",
+      description: rich_media["description"] || "",
       pleroma: %{
         opengraph: rich_media
       }
diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex
index 9d3d7f978..1729141e9 100644
--- a/lib/pleroma/web/rich_media/helpers.ex
+++ b/lib/pleroma/web/rich_media/helpers.ex
@@ -9,7 +9,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
   alias Pleroma.Object
   alias Pleroma.Web.RichMedia.Parser
 
-  @spec validate_page_url(any()) :: :ok | :error
+  @spec validate_page_url(URI.t() | binary()) :: :ok | :error
   defp validate_page_url(page_url) when is_binary(page_url) do
     validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
 
@@ -18,8 +18,8 @@ defp validate_page_url(page_url) when is_binary(page_url) do
     |> parse_uri(page_url)
   end
 
-  defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority})
-       when scheme == "https" and not is_nil(authority) do
+  defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
+       when is_binary(authority) do
     cond do
       host in Config.get([:rich_media, :ignore_hosts], []) ->
         :error
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index 0779065ee..7b45ecb9c 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -83,7 +83,7 @@ defp parse_url(url) do
       html
       |> parse_html()
       |> maybe_parse()
-      |> Map.put(:url, url)
+      |> Map.put("url", url)
       |> clean_parsed_data()
       |> check_parsed_data()
     rescue
@@ -103,8 +103,8 @@ defp maybe_parse(html) do
     end)
   end
 
-  defp check_parsed_data(%{title: title} = data)
-       when is_binary(title) and byte_size(title) > 0 do
+  defp check_parsed_data(%{"title" => title} = data)
+       when is_binary(title) and title != "" do
     {:ok, data}
   end
 
@@ -115,11 +115,7 @@ defp check_parsed_data(data) do
   defp clean_parsed_data(data) do
     data
     |> Enum.reject(fn {key, val} ->
-      with {:ok, _} <- Jason.encode(%{key => val}) do
-        false
-      else
-        _ -> true
-      end
+      not match?({:ok, _}, Jason.encode(%{key => val}))
     end)
     |> Map.new()
   end
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
index ae0f36702..2762b5902 100644
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -29,19 +29,19 @@ defp normalize_attributes(html_node, prefix, key_name, value_name) do
     {_tag, attributes, _children} = html_node
 
     data =
-      Enum.into(attributes, %{}, fn {name, value} ->
+      Map.new(attributes, fn {name, value} ->
         {name, String.trim_leading(value, "#{prefix}:")}
       end)
 
-    %{String.to_atom(data[key_name]) => data[value_name]}
+    %{data[key_name] => data[value_name]}
   end
 
-  defp maybe_put_title(%{title: _} = meta, _), do: meta
+  defp maybe_put_title(%{"title" => _} = meta, _), do: meta
 
   defp maybe_put_title(meta, html) when meta != %{} do
     case get_page_title(html) do
       "" -> meta
-      title -> Map.put_new(meta, :title, title)
+      title -> Map.put_new(meta, "title", title)
     end
   end
 
diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
index 8f32bf91b..db8ccf15d 100644
--- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
@@ -5,7 +5,7 @@
 defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
   def parse(html, _data) do
     with elements = [_ | _] <- get_discovery_data(html),
-         {:ok, oembed_url} <- get_oembed_url(elements),
+         oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
          {:ok, oembed_data} <- get_oembed_data(oembed_url) do
       {:ok, oembed_data}
     else
@@ -17,19 +17,13 @@ defp get_discovery_data(html) do
     html |> Floki.find("link[type='application/json+oembed']")
   end
 
-  defp get_oembed_url(nodes) do
-    {"link", attributes, _children} = nodes |> hd()
-
-    {:ok, Enum.into(attributes, %{})["href"]}
+  defp get_oembed_url([{"link", attributes, _children} | _]) do
+    Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end)
   end
 
   defp get_oembed_data(url) do
-    {:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
-
-    {:ok, data} = Jason.decode(json)
-
-    data = data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
-
-    {:ok, data}
+    with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do
+      Jason.decode(json)
+    end
   end
 end
diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs
index e54a13bc8..420a612c6 100644
--- a/test/web/rich_media/parser_test.exs
+++ b/test/web/rich_media/parser_test.exs
@@ -60,19 +60,19 @@ test "returns error when no metadata present" do
   test "doesn't just add a title" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
              {:error,
-              "Found metadata was invalid or incomplete: %{url: \"http://example.com/non-ogp\"}"}
+              "Found metadata was invalid or incomplete: %{\"url\" => \"http://example.com/non-ogp\"}"}
   end
 
   test "parses ogp" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
              {:ok,
               %{
-                image: "http://ia.media-imdb.com/images/rock.jpg",
-                title: "The Rock",
-                description:
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock",
+                "description" =>
                   "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
-                type: "video.movie",
-                url: "http://example.com/ogp"
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp"
               }}
   end
 
@@ -80,12 +80,12 @@ test "falls back to <title> when ogp:title is missing" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
              {:ok,
               %{
-                image: "http://ia.media-imdb.com/images/rock.jpg",
-                title: "The Rock (1996)",
-                description:
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock (1996)",
+                "description" =>
                   "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
-                type: "video.movie",
-                url: "http://example.com/ogp-missing-title"
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp-missing-title"
               }}
   end
 
@@ -93,12 +93,12 @@ test "parses twitter card" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
              {:ok,
               %{
-                card: "summary",
-                site: "@flickr",
-                image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
-                title: "Small Island Developing States Photo Submission",
-                description: "View the album on Flickr.",
-                url: "http://example.com/twitter-card"
+                "card" => "summary",
+                "site" => "@flickr",
+                "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
+                "title" => "Small Island Developing States Photo Submission",
+                "description" => "View the album on Flickr.",
+                "url" => "http://example.com/twitter-card"
               }}
   end
 
@@ -106,27 +106,28 @@ test "parses OEmbed" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
              {:ok,
               %{
-                author_name: "‮‭‬bees‬",
-                author_url: "https://www.flickr.com/photos/bees/",
-                cache_age: 3600,
-                flickr_type: "photo",
-                height: "768",
-                html:
+                "author_name" => "‮‭‬bees‬",
+                "author_url" => "https://www.flickr.com/photos/bees/",
+                "cache_age" => 3600,
+                "flickr_type" => "photo",
+                "height" => "768",
+                "html" =>
                   "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by ‮‭‬bees‬, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
-                license: "All Rights Reserved",
-                license_id: 0,
-                provider_name: "Flickr",
-                provider_url: "https://www.flickr.com/",
-                thumbnail_height: 150,
-                thumbnail_url: "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
-                thumbnail_width: 150,
-                title: "Bacon Lollys",
-                type: "photo",
-                url: "http://example.com/oembed",
-                version: "1.0",
-                web_page: "https://www.flickr.com/photos/bees/2362225867/",
-                web_page_short_url: "https://flic.kr/p/4AK2sc",
-                width: "1024"
+                "license" => "All Rights Reserved",
+                "license_id" => 0,
+                "provider_name" => "Flickr",
+                "provider_url" => "https://www.flickr.com/",
+                "thumbnail_height" => 150,
+                "thumbnail_url" =>
+                  "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
+                "thumbnail_width" => 150,
+                "title" => "Bacon Lollys",
+                "type" => "photo",
+                "url" => "http://example.com/oembed",
+                "version" => "1.0",
+                "web_page" => "https://www.flickr.com/photos/bees/2362225867/",
+                "web_page_short_url" => "https://flic.kr/p/4AK2sc",
+                "width" => "1024"
               }}
   end
 
diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs
index 87c767c15..847623535 100644
--- a/test/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/web/rich_media/parsers/twitter_card_test.exs
@@ -19,11 +19,11 @@ test "parses twitter card with only name attributes" do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                site: nil,
-                title:
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+                "site" => nil,
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
               }}
   end
@@ -36,15 +36,15 @@ test "parses twitter card with only property attributes" do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                card: "summary_large_image",
-                description:
+                "card" => "summary_large_image",
+                "description" =>
                   "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
+                "image" =>
                   "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                title:
+                "image:alt" => "",
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
+                "url" =>
                   "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
               }}
   end
@@ -57,19 +57,19 @@ test "parses twitter card with name & property attributes" do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                card: "summary_large_image",
-                description:
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+                "card" => "summary_large_image",
+                "description" =>
                   "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
+                "image" =>
                   "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                site: nil,
-                title:
+                "image:alt" => "",
+                "site" => nil,
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
+                "url" =>
                   "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
               }}
   end
@@ -86,11 +86,11 @@ test "respect only first title tag on the page" do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                site: "@atlasobscura",
-                title:
+                "site" => "@atlasobscura",
+                "title" =>
                   "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
-                card: "summary_large_image",
-                image: image_path
+                "card" => "summary_large_image",
+                "image" => image_path
               }}
   end
 
@@ -102,12 +102,12 @@ test "takes first founded title in html head if there is html markup error" do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                site: nil,
-                title:
+                "site" => nil,
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622"
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622"
               }}
   end
 end