Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/19286.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved previews for Mastodon and Tumblr posts.
32 changes: 28 additions & 4 deletions synapse/media/oembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
logger = logging.getLogger(__name__)


# Certain providers supply a stub description less useful than Open Graph
REJECT_PROVIDER_DESCRIPTIONS = {"Tumblr"}


@attr.s(slots=True, frozen=True, auto_attribs=True)
class OEmbedResult:
# The Open Graph result (converted from the oEmbed result).
Expand Down Expand Up @@ -196,7 +200,7 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult:
if oembed_type == "rich":
html_str = oembed.get("html")
if isinstance(html_str, str):
calc_description_and_urls(open_graph_response, html_str)
calc_description_and_urls(open_graph_response, html_str, provider_name)

elif oembed_type == "photo":
# If this is a photo, use the full image, not the thumbnail.
Expand All @@ -208,7 +212,9 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult:
open_graph_response["og:type"] = "video.other"
html_str = oembed.get("html")
if html_str and isinstance(html_str, str):
calc_description_and_urls(open_graph_response, oembed["html"])
calc_description_and_urls(
open_graph_response, oembed["html"], provider_name
)
for size in ("width", "height"):
val = oembed.get(size)
if type(val) is int: # noqa: E721
Expand All @@ -232,7 +238,25 @@ def _fetch_urls(tree: "etree._Element", tag_name: str) -> list[str]:
return results


def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:
def _should_reject_description(description: str, provider_name: str | None) -> bool:
"""
Determines whether or not this description should be preferred over the
og:description. Certain web apps with client-side JavaScript will serve
stub documents via oembed that are intended to be filled in by client-
side javascript that synapse won't execute; in these cases, the
og:description will actually be more complete.
"""

return provider_name in REJECT_PROVIDER_DESCRIPTIONS or (
# Mastodon posts can't be identified by provider, since the domain is
# used as the provider, so identify these via content instead.
description.startswith("Post by @") and "View on Mastodon" in description
)


def calc_description_and_urls(
open_graph_response: JsonDict, html_body: str, provider_name: str | None
) -> None:
"""
Calculate description for an HTML document.

Expand Down Expand Up @@ -273,5 +297,5 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) ->
open_graph_response["og:video"] = video_urls[0]

description = parse_html_description(tree)
if description:
if description and not _should_reject_description(description, provider_name):
open_graph_response["og:description"] = description