diff --git a/changelog.d/19231.bugfix b/changelog.d/19231.bugfix new file mode 100644 index 00000000000..580b642bb23 --- /dev/null +++ b/changelog.d/19231.bugfix @@ -0,0 +1 @@ +Fix a bug where Mastodon posts (and possibly other embeds) have the wrong description for URL previews. diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index bbd8017b130..6b2918f72b7 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -328,10 +328,16 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes: # response failed or is incomplete. og_from_html = parse_html_to_open_graph(tree) - # Compile the Open Graph response by using the scraped - # information from the HTML and overlaying any information - # from the oEmbed response. - og = {**og_from_html, **og_from_oembed} + # Compile an Open Graph response by combining the oEmbed response + # and the information from the HTML, with information in the HTML + # preferred. + # + # The ordering here is intentional: certain websites (especially + # SPA JavaScript-based ones) including Mastodon and YouTube provide + # almost complete OpenGraph descriptions but only stubs for oEmbed, + # with further oEmbed information being populated with JavaScript, + # that Synapse won't execute. + og = og_from_oembed | og_from_html await self._precache_image_url(user, media_info, og) else: