element-hq
diff --git a/‎changelog.d/19212.misc‎
Lines changed: 1 addition & 0 deletions b/‎changelog.d/19212.misc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎synapse/api/constants.py‎
Lines changed: 13 additions & 0 deletions b/‎synapse/api/constants.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎synapse/app/_base.py‎
Lines changed: 3 additions & 12 deletions b/‎synapse/app/_base.py‎
Lines changed: 3 additions & 12 deletions
diff --git a/‎synapse/http/site.py‎
Lines changed: 139 additions & 19 deletions b/‎synapse/http/site.py‎
Lines changed: 139 additions & 19 deletions
diff --git a/‎tests/http/test_site.py‎
Lines changed: 102 additions & 0 deletions b/‎tests/http/test_site.py‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎tests/rest/client/test_login.py‎
Lines changed: 0 additions & 6 deletions b/‎tests/rest/client/test_login.py‎
Lines changed: 0 additions & 6 deletions
@@ -0,0 +1 @@
+Respond with useful error codes with `Content-Length` header/s are invalid.
@@ -29,6 +29,19 @@
 # the max size of a (canonical-json-encoded) event
 MAX_PDU_SIZE = 65536
 
+# The maximum allowed size of an HTTP request.
+# Other than media uploads, the biggest request we expect to see is a fully-loaded
+# /federation/v1/send request.
+#
+# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
+# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
+# json encoding); there is no specced limit to EDUs (see
+# https://github.com/matrix-org/matrix-doc/issues/3121).
+#
+# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
+#
+MAX_REQUEST_SIZE = 200 * MAX_PDU_SIZE
+
 # Max/min size of ints in canonical JSON
 CANONICALJSON_MAX_INT = (2**53) - 1
 CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
 
@@ -59,7 +59,7 @@
 from twisted.web.resource import Resource
 
 import synapse.util.caches
-from synapse.api.constants import MAX_PDU_SIZE
+from synapse.api.constants import MAX_REQUEST_SIZE
 from synapse.app import check_bind_error
 from synapse.config import ConfigError
 from synapse.config._base import format_config_error
@@ -895,17 +895,8 @@ def sdnotify(state: bytes) -> None:
 def max_request_body_size(config: HomeServerConfig) -> int:
     """Get a suitable maximum size for incoming HTTP requests"""
 
-    # Other than media uploads, the biggest request we expect to see is a fully-loaded
-    # /federation/v1/send request.
-    #
-    # The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
-    # limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
-    # json encoding); there is no specced limit to EDUs (see
-    # https://github.com/matrix-org/matrix-doc/issues/3121).
-    #
-    # in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
-    #
-    max_request_size = 200 * MAX_PDU_SIZE
+    # Baseline default for any request that isn't configured in the homeserver config
+    max_request_size = MAX_REQUEST_SIZE
 
     # if we have a media repo enabled, we may need to allow larger uploads than that
     if config.media.can_load_media_repo:
 
@@ -19,6 +19,7 @@
 #
 #
 import contextlib
+import json
 import logging
 import time
 from http import HTTPStatus
@@ -36,6 +37,7 @@
 from twisted.web.resource import IResource, Resource
 from twisted.web.server import Request
 
+from synapse.api.errors import Codes, SynapseError
 from synapse.config.server import ListenerConfig
 from synapse.http import get_request_user_agent, redact_uri
 from synapse.http.proxy import ProxySite
@@ -59,6 +61,10 @@
 _next_request_seq = 0
 
 
+class ContentLengthError(SynapseError):
+    """Raised when content-length validation fails."""
+
+
 class SynapseRequest(Request):
     """Class which encapsulates an HTTP request to synapse.
 
@@ -144,36 +150,150 @@ def __repr__(self) -> str:
             self.synapse_site.site_tag,
         )
 
+    def _respond_with_error(self, synapse_error: SynapseError) -> None:
+        """Send an error response and close the connection."""
+        self.setResponseCode(synapse_error.code)
+        error_response_bytes = json.dumps(synapse_error.error_dict(None)).encode()
+
+        self.responseHeaders.setRawHeaders(b"Content-Type", [b"application/json"])
+        self.responseHeaders.setRawHeaders(
+            b"Content-Length", [f"{len(error_response_bytes)}"]
+        )
+        self.write(error_response_bytes)
+        self.loseConnection()
+
+    def _get_content_length_from_headers(self) -> int | None:
+        """Attempts to obtain the `Content-Length` value from the request's headers.
+
+        Returns:
+            Content length as `int` if present. Otherwise `None`.
+
+        Raises:
+            ContentLengthError: if multiple `Content-Length` headers are present or the
+                value is not an `int`.
+        """
+        content_length_headers = self.requestHeaders.getRawHeaders(b"Content-Length")
+        if content_length_headers is None:
+            return None
+
+        # If there are multiple `Content-Length` headers return an error.
+        # We don't want to even try to pick the right one if there are multiple
+        # as we could run into problems similar to request smuggling vulnerabilities
+        # which rely on the mismatch of how different systems interpret information.
+        if len(content_length_headers) != 1:
+            raise ContentLengthError(
+                HTTPStatus.BAD_REQUEST,
+                "Multiple Content-Length headers received",
+                Codes.UNKNOWN,
+            )
+
+        try:
+            return int(content_length_headers[0])
+        except (ValueError, TypeError):
+            raise ContentLengthError(
+                HTTPStatus.BAD_REQUEST,
+                "Content-Length header value is not a valid integer",
+                Codes.UNKNOWN,
+            )
+
+    def _validate_content_length(self) -> None:
+        """Validate Content-Length header and actual content size.
+
+        Raises:
+            ContentLengthError: If validation fails.
+        """
+        # we should have a `content` by now.
+        assert self.content, "_validate_content_length() called before gotLength()"
+        content_length = self._get_content_length_from_headers()
+
+        if content_length is None:
+            return
+
+        actual_content_length = self.content.tell()
+
+        if content_length > self._max_request_body_size:
+            logger.info(
+                "Rejecting request from %s because Content-Length %d exceeds maximum size %d: %s %s",
+                self.client,
+                content_length,
+                self._max_request_body_size,
+                self.get_method(),
+                self.get_redacted_uri(),
+            )
+            raise ContentLengthError(
+                HTTPStatus.REQUEST_ENTITY_TOO_LARGE,
+                f"Request content is too large (>{self._max_request_body_size})",
+                Codes.TOO_LARGE,
+            )
+
+        if content_length != actual_content_length:
+            comparison = (
+                "smaller" if content_length < actual_content_length else "larger"
+            )
+            logger.info(
+                "Rejecting request from %s because Content-Length %d is %s than the request content size %d: %s %s",
+                self.client,
+                content_length,
+                comparison,
+                actual_content_length,
+                self.get_method(),
+                self.get_redacted_uri(),
+            )
+            raise ContentLengthError(
+                HTTPStatus.BAD_REQUEST,
+                f"Rejecting request as the Content-Length header value {content_length} "
+                f"is {comparison} than the actual request content size {actual_content_length}",
+                Codes.UNKNOWN,
+            )
+
     # Twisted machinery: this method is called by the Channel once the full request has
     # been received, to dispatch the request to a resource.
-    #
-    # We're patching Twisted to bail/abort early when we see someone trying to upload
-    # `multipart/form-data` so we can avoid Twisted parsing the entire request body into
-    # in-memory (specific problem of this specific `Content-Type`). This protects us
-    # from an attacker uploading something bigger than the available RAM and crashing
-    # the server with a `MemoryError`, or carefully block just enough resources to cause
-    # all other requests to fail.
-    #
-    # FIXME: This can be removed once we Twisted releases a fix and we update to a
-    # version that is patched
     def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None:
+        # In the case of a Content-Length header being present, and it's value being too
+        # large, throw a proper error to make debugging issues due to overly large requests much
+        # easier. Currently we handle such cases in `handleContentChunk` and abort the
+        # connection without providing a proper HTTP response.
+        #
+        # Attempting to write an HTTP response from within `handleContentChunk` does not
+        # work, so the code here has been added to at least provide a response in the
+        # case of the Content-Length header being present.
+        self.method, self.uri = command, path
+        self.clientproto = version
+
+        try:
+            self._validate_content_length()
+        except ContentLengthError as e:
+            self._respond_with_error(e)
+            return
+
+        # We're patching Twisted to bail/abort early when we see someone trying to upload
+        # `multipart/form-data` so we can avoid Twisted parsing the entire request body into
+        # in-memory (specific problem of this specific `Content-Type`). This protects us
+        # from an attacker uploading something bigger than the available RAM and crashing
+        # the server with a `MemoryError`, or carefully block just enough resources to cause
+        # all other requests to fail.
+        #
+        # FIXME: This can be removed once Twisted releases a fix and we update to a
+        # version that is patched
+        # See: https://github.com/element-hq/synapse/security/advisories/GHSA-rfq8-j7rh-8hf2
         if command == b"POST":
             ctype = self.requestHeaders.getRawHeaders(b"content-type")
             if ctype and b"multipart/form-data" in ctype[0]:
-                self.method, self.uri = command, path
-                self.clientproto = version
+                logger.warning(
+                    "Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
+                    self.client,
+                    self.get_method(),
+                    self.get_redacted_uri(),
+                )
+
                 self.code = HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value
                 self.code_message = bytes(
                     HTTPStatus.UNSUPPORTED_MEDIA_TYPE.phrase, "ascii"
                 )
-                self.responseHeaders.setRawHeaders(b"content-length", [b"0"])
 
-                logger.warning(
-                    "Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
-                    self.client,
-                    command,
-                    path,
-                )
+                # FIXME: Return a better error response here similar to the
+                # `error_response_json` returned in other code paths here.
+                self.responseHeaders.setRawHeaders(b"Content-Length", [b"0"])
                 self.write(b"")
                 self.loseConnection()
                 return
 
@@ -22,6 +22,7 @@
 from twisted.internet.address import IPv6Address
 from twisted.internet.testing import MemoryReactor, StringTransport
 
+from synapse.app._base import max_request_body_size
 from synapse.app.homeserver import SynapseHomeServer
 from synapse.server import HomeServer
 from synapse.util.clock import Clock
@@ -143,3 +144,104 @@ def test_content_type_multipart(self) -> None:
 
         # we should get a 415
         self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 415 ")
+
+    def test_content_length_too_large(self) -> None:
+        """HTTP requests with Content-Length exceeding max size should be rejected with 413"""
+        self.hs.start_listening()
+
+        # find the HTTP server which is configured to listen on port 0
+        (port, factory, _backlog, interface) = self.reactor.tcpServers[0]
+        self.assertEqual(interface, "::")
+        self.assertEqual(port, 0)
+
+        # complete the connection and wire it up to a fake transport
+        client_address = IPv6Address("TCP", "::1", 2345)
+        protocol = factory.buildProtocol(client_address)
+        transport = StringTransport()
+        protocol.makeConnection(transport)
+
+        # Send a request with Content-Length header that exceeds the limit.
+        # Default max is 50MB (from media max_upload_size), so send something larger.
+        oversized_length = 1 + max_request_body_size(self.hs.config)
+        protocol.dataReceived(
+            b"POST / HTTP/1.1\r\n"
+            b"Connection: close\r\n"
+            b"Content-Length: " + str(oversized_length).encode() + b"\r\n"
+            b"\r\n"
+            b"" + b"x" * oversized_length + b"\r\n"
+            b"\r\n"
+        )
+
+        # Advance the reactor to process the request
+        while not transport.disconnecting:
+            self.reactor.advance(1)
+
+        # We should get a 413 Content Too Large
+        response = transport.value().decode()
+        self.assertRegex(response, r"^HTTP/1\.1 413 ")
+        self.assertSubstring("M_TOO_LARGE", response)
+
+    def test_too_many_content_length_headers(self) -> None:
+        """HTTP requests with multiple Content-Length headers should be rejected with 400"""
+        self.hs.start_listening()
+
+        # find the HTTP server which is configured to listen on port 0
+        (port, factory, _backlog, interface) = self.reactor.tcpServers[0]
+        self.assertEqual(interface, "::")
+        self.assertEqual(port, 0)
+
+        # complete the connection and wire it up to a fake transport
+        client_address = IPv6Address("TCP", "::1", 2345)
+        protocol = factory.buildProtocol(client_address)
+        transport = StringTransport()
+        protocol.makeConnection(transport)
+
+        protocol.dataReceived(
+            b"POST / HTTP/1.1\r\n"
+            b"Connection: close\r\n"
+            b"Content-Length: " + str(5).encode() + b"\r\n"
+            b"Content-Length: " + str(5).encode() + b"\r\n"
+            b"\r\n"
+            b"" + b"xxxxx" + b"\r\n"
+            b"\r\n"
+        )
+
+        # Advance the reactor to process the request
+        while not transport.disconnecting:
+            self.reactor.advance(1)
+
+        # We should get a 400
+        response = transport.value().decode()
+        self.assertRegex(response, r"^HTTP/1\.1 400 ")
+
+    def test_invalid_content_length_headers(self) -> None:
+        """HTTP requests with invalid Content-Length header should be rejected with 400"""
+        self.hs.start_listening()
+
+        # find the HTTP server which is configured to listen on port 0
+        (port, factory, _backlog, interface) = self.reactor.tcpServers[0]
+        self.assertEqual(interface, "::")
+        self.assertEqual(port, 0)
+
+        # complete the connection and wire it up to a fake transport
+        client_address = IPv6Address("TCP", "::1", 2345)
+        protocol = factory.buildProtocol(client_address)
+        transport = StringTransport()
+        protocol.makeConnection(transport)
+
+        protocol.dataReceived(
+            b"POST / HTTP/1.1\r\n"
+            b"Connection: close\r\n"
+            b"Content-Length: eight\r\n"
+            b"\r\n"
+            b"" + b"xxxxx" + b"\r\n"
+            b"\r\n"
+        )
+
+        # Advance the reactor to process the request
+        while not transport.disconnecting:
+            self.reactor.advance(1)
+
+        # We should get a 400
+        response = transport.value().decode()
+        self.assertRegex(response, r"^HTTP/1\.1 400 ")
@@ -1728,9 +1728,6 @@ def test_username_picker_use_displayname_avatar_and_email(self) -> None:
             content_is_form=True,
             custom_headers=[
                 ("Cookie", "username_mapping_session=" + session_id),
-                # old versions of twisted don't do form-parsing without a valid
-                # content-length header.
-                ("Content-Length", str(len(content))),
             ],
         )
         self.assertEqual(chan.code, 302, chan.result)
@@ -1818,9 +1815,6 @@ def test_username_picker_dont_use_displayname_avatar_or_email(self) -> None:
             content_is_form=True,
             custom_headers=[
                 ("Cookie", "username_mapping_session=" + session_id),
-                # old versions of twisted don't do form-parsing without a valid
-                # content-length header.
-                ("Content-Length", str(len(content))),
             ],
         )
         self.assertEqual(chan.code, 302, chan.result)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Respond with useful error codes with `Content-Length` header/s are invalid.