Merge branch 'feature/optimize_rich_media_parser' into 'develop'
added prepare html for RichMedia.Parser See merge request pleroma/pleroma!1672
This commit is contained in:
commit
d1a84ceec8
@ -81,6 +81,7 @@ defp parse_url(url) do
|
|||||||
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)
|
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)
|
||||||
|
|
||||||
html
|
html
|
||||||
|
|> parse_html
|
||||||
|> maybe_parse()
|
|> maybe_parse()
|
||||||
|> Map.put(:url, url)
|
|> Map.put(:url, url)
|
||||||
|> clean_parsed_data()
|
|> clean_parsed_data()
|
||||||
@ -91,6 +92,8 @@ defp parse_url(url) do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp parse_html(html), do: Floki.parse(html)
|
||||||
|
|
||||||
defp maybe_parse(html) do
|
defp maybe_parse(html) do
|
||||||
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
|
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
|
||||||
case parser.parse(html, acc) do
|
case parser.parse(html, acc) do
|
||||||
@ -100,7 +103,8 @@ defp maybe_parse(html) do
|
|||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp check_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
|
defp check_parsed_data(%{title: title} = data)
|
||||||
|
when is_binary(title) and byte_size(title) > 0 do
|
||||||
{:ok, data}
|
{:ok, data}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
2
mix.exs
2
mix.exs
@ -133,7 +133,7 @@ defp deps do
|
|||||||
{:phoenix_swoosh, "~> 0.2"},
|
{:phoenix_swoosh, "~> 0.2"},
|
||||||
{:gen_smtp, "~> 0.13"},
|
{:gen_smtp, "~> 0.13"},
|
||||||
{:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test},
|
{:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test},
|
||||||
{:floki, "~> 0.20.0"},
|
{:floki, "~> 0.23.0"},
|
||||||
{:ex_syslogger, github: "slashmili/ex_syslogger", tag: "1.4.0"},
|
{:ex_syslogger, github: "slashmili/ex_syslogger", tag: "1.4.0"},
|
||||||
{:timex, "~> 3.5"},
|
{:timex, "~> 3.5"},
|
||||||
{:ueberauth, "~> 0.4"},
|
{:ueberauth, "~> 0.4"},
|
||||||
|
2
mix.lock
2
mix.lock
@ -34,7 +34,7 @@
|
|||||||
"ex_rated": {:hex, :ex_rated, "1.3.3", "30ecbdabe91f7eaa9d37fa4e81c85ba420f371babeb9d1910adbcd79ec798d27", [:mix], [{:ex2ms, "~> 1.5", [hex: :ex2ms, repo: "hexpm", optional: false]}], "hexpm"},
|
"ex_rated": {:hex, :ex_rated, "1.3.3", "30ecbdabe91f7eaa9d37fa4e81c85ba420f371babeb9d1910adbcd79ec798d27", [:mix], [{:ex2ms, "~> 1.5", [hex: :ex2ms, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
"ex_syslogger": {:git, "https://github.com/slashmili/ex_syslogger.git", "f3963399047af17e038897c69e20d552e6899e1d", [tag: "1.4.0"]},
|
"ex_syslogger": {:git, "https://github.com/slashmili/ex_syslogger.git", "f3963399047af17e038897c69e20d552e6899e1d", [tag: "1.4.0"]},
|
||||||
"excoveralls": {:hex, :excoveralls, "0.11.1", "dd677fbdd49114fdbdbf445540ec735808250d56b011077798316505064edb2c", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
|
"excoveralls": {:hex, :excoveralls, "0.11.1", "dd677fbdd49114fdbdbf445540ec735808250d56b011077798316505064edb2c", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
"floki": {:hex, :floki, "0.20.4", "be42ac911fece24b4c72f3b5846774b6e61b83fe685c2fc9d62093277fb3bc86", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
|
"floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
"gen_smtp": {:hex, :gen_smtp, "0.14.0", "39846a03522456077c6429b4badfd1d55e5e7d0fdfb65e935b7c5e38549d9202", [:rebar3], [], "hexpm"},
|
"gen_smtp": {:hex, :gen_smtp, "0.14.0", "39846a03522456077c6429b4badfd1d55e5e7d0fdfb65e935b7c5e38549d9202", [:rebar3], [], "hexpm"},
|
||||||
"gen_stage": {:hex, :gen_stage, "0.14.2", "6a2a578a510c5bfca8a45e6b27552f613b41cf584b58210f017088d3d17d0b14", [:mix], [], "hexpm"},
|
"gen_stage": {:hex, :gen_stage, "0.14.2", "6a2a578a510c5bfca8a45e6b27552f613b41cf584b58210f017088d3d17d0b14", [:mix], [], "hexpm"},
|
||||||
"gen_state_machine": {:hex, :gen_state_machine, "2.0.5", "9ac15ec6e66acac994cc442dcc2c6f9796cf380ec4b08267223014be1c728a95", [:mix], [], "hexpm"},
|
"gen_state_machine": {:hex, :gen_state_machine, "2.0.5", "9ac15ec6e66acac994cc442dcc2c6f9796cf380ec4b08267223014be1c728a95", [:mix], [], "hexpm"},
|
||||||
|
Loading…
Reference in New Issue
Block a user