From e6d0ea1d6b13a979924329d02fb82f79d82c7236 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sat, 4 Jan 2025 10:18:38 +0100 Subject: [PATCH] Read chunked HTTP responses. Fix #1550. --- src/websockets/client.py | 2 +- src/websockets/http11.py | 145 ++++++++++++++++++++++++--------------- src/websockets/server.py | 2 +- tests/test_client.py | 2 +- tests/test_http11.py | 93 ++++++++++++++++++++----- tests/test_server.py | 2 +- 6 files changed, 167 insertions(+), 79 deletions(-) diff --git a/src/websockets/client.py b/src/websockets/client.py index 5ced05c2..37e2a8b3 100644 --- a/src/websockets/client.py +++ b/src/websockets/client.py @@ -333,7 +333,7 @@ def parse(self) -> Generator[None]: self.logger.debug("< HTTP/1.1 %d %s", code, phrase) for key, value in response.headers.raw_items(): self.logger.debug("< %s: %s", key, value) - if response.body is not None: + if response.body: self.logger.debug("< [body] (%d bytes)", len(response.body)) try: diff --git a/src/websockets/http11.py b/src/websockets/http11.py index 949a320f..396a43f0 100644 --- a/src/websockets/http11.py +++ b/src/websockets/http11.py @@ -185,14 +185,14 @@ class Response: status_code: Response code. reason_phrase: Response reason. headers: Response headers. - body: Response body, if any. + body: Response body. """ status_code: int reason_phrase: str headers: Headers - body: bytes | None = None + body: bytes = b"" _exception: Exception | None = None @@ -266,36 +266,9 @@ def parse( headers = yield from parse_headers(read_line) - # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 - - if "Transfer-Encoding" in headers: - raise NotImplementedError("transfer codings aren't supported") - - # Since websockets only does GET requests (no HEAD, no CONNECT), all - # responses except 1xx, 204, and 304 include a message body. - if 100 <= status_code < 200 or status_code == 204 or status_code == 304: - body = None - else: - content_length: int | None - try: - # MultipleValuesError is sufficiently unlikely that we don't - # attempt to handle it. Instead we document that its parent - # class, LookupError, may be raised. - raw_content_length = headers["Content-Length"] - except KeyError: - content_length = None - else: - content_length = int(raw_content_length) - - if content_length is None: - try: - body = yield from read_to_eof(MAX_BODY_SIZE) - except RuntimeError: - raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes") - elif content_length > MAX_BODY_SIZE: - raise SecurityError(f"body too large: {content_length} bytes") - else: - body = yield from read_exact(content_length) + body = yield from read_body( + status_code, headers, read_line, read_exact, read_to_eof + ) return cls(status_code, reason, headers, body) @@ -308,11 +281,37 @@ def serialize(self) -> bytes: # we can keep this simple. response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode() response += self.headers.serialize() - if self.body is not None: - response += self.body + response += self.body return response +def parse_line( + read_line: Callable[[int], Generator[None, None, bytes]], +) -> Generator[None, None, bytes]: + """ + Parse a single line. + + CRLF is stripped from the return value. + + Args: + read_line: Generator-based coroutine that reads a LF-terminated line + or raises an exception if there isn't enough data. + + Raises: + EOFError: If the connection is closed without a CRLF. + SecurityError: If the response exceeds a security limit. + + """ + try: + line = yield from read_line(MAX_LINE_LENGTH) + except RuntimeError: + raise SecurityError("line too long") + # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5 + if not line.endswith(b"\r\n"): + raise EOFError("line without CRLF") + return line[:-2] + + def parse_headers( read_line: Callable[[int], Generator[None, None, bytes]], ) -> Generator[None, None, Headers]: @@ -364,28 +363,62 @@ def parse_headers( return headers -def parse_line( +def read_body( + status_code: int, + headers: Headers, read_line: Callable[[int], Generator[None, None, bytes]], + read_exact: Callable[[int], Generator[None, None, bytes]], + read_to_eof: Callable[[int], Generator[None, None, bytes]], ) -> Generator[None, None, bytes]: - """ - Parse a single line. - - CRLF is stripped from the return value. - - Args: - read_line: Generator-based coroutine that reads a LF-terminated line - or raises an exception if there isn't enough data. - - Raises: - EOFError: If the connection is closed without a CRLF. - SecurityError: If the response exceeds a security limit. + # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 + + # Since websockets only does GET requests (no HEAD, no CONNECT), all + # responses except 1xx, 204, and 304 include a message body. + if 100 <= status_code < 200 or status_code == 204 or status_code == 304: + return b"" + + # MultipleValuesError is sufficiently unlikely that we don't attempt to + # handle it when accessing headers. Instead we document that its parent + # class, LookupError, may be raised. + # Conversions from str to int are protected by sys.set_int_max_str_digits.. + + elif (coding := headers.get("Transfer-Encoding")) is not None: + if coding != "chunked": + raise NotImplementedError(f"transfer coding {coding} isn't supported") + + body = b"" + while True: + chunk_size_line = yield from parse_line(read_line) + raw_chunk_size = chunk_size_line.split(b";", 1)[0] + # Set a lower limit than default_max_str_digits; 1 EB is plenty. + if len(raw_chunk_size) > 15: + str_chunk_size = raw_chunk_size.decode(errors="backslashreplace") + raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes") + chunk_size = int(raw_chunk_size, 16) + if chunk_size == 0: + break + if len(body) + chunk_size > MAX_BODY_SIZE: + raise SecurityError( + f"chunk too large: {chunk_size} bytes after {len(body)} bytes" + ) + body += yield from read_exact(chunk_size) + if (yield from read_exact(2)) != b"\r\n": + raise ValueError("chunk without CRLF") + # Read the trailer. + yield from parse_headers(read_line) + return body + + elif (raw_content_length := headers.get("Content-Length")) is not None: + # Set a lower limit than default_max_str_digits; 1 EiB is plenty. + if len(raw_content_length) > 18: + raise SecurityError(f"body too large: {raw_content_length} bytes") + content_length = int(raw_content_length) + if content_length > MAX_BODY_SIZE: + raise SecurityError(f"body too large: {content_length} bytes") + return (yield from read_exact(content_length)) - """ - try: - line = yield from read_line(MAX_LINE_LENGTH) - except RuntimeError: - raise SecurityError("line too long") - # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5 - if not line.endswith(b"\r\n"): - raise EOFError("line without CRLF") - return line[:-2] + else: + try: + return (yield from read_to_eof(MAX_BODY_SIZE)) + except RuntimeError: + raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes") diff --git a/src/websockets/server.py b/src/websockets/server.py index 1b663a13..fe3c65a7 100644 --- a/src/websockets/server.py +++ b/src/websockets/server.py @@ -525,7 +525,7 @@ def send_response(self, response: Response) -> None: self.logger.debug("> HTTP/1.1 %d %s", code, phrase) for key, value in response.headers.raw_items(): self.logger.debug("> %s: %s", key, value) - if response.body is not None: + if response.body: self.logger.debug("> [body] (%d bytes)", len(response.body)) self.writes.append(response.serialize()) diff --git a/tests/test_client.py b/tests/test_client.py index 1edbae57..9f3ab09b 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -204,7 +204,7 @@ def test_receive_successful_response(self, _generate_key): } ), ) - self.assertIsNone(response.body) + self.assertEqual(response.body, b"") self.assertIsNone(client.handshake_exc) def test_receive_failed_response(self, _generate_key): diff --git a/tests/test_http11.py b/tests/test_http11.py index 1fbcb3ba..bb0d27b9 100644 --- a/tests/test_http11.py +++ b/tests/test_http11.py @@ -87,7 +87,7 @@ def test_parse_body(self): ) def test_parse_body_with_transfer_encoding(self): - self.reader.feed_data(b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n") + self.reader.feed_data(b"GET / HTTP/1.1\r\nTransfer-Encoding: compress\r\n\r\n") with self.assertRaises(NotImplementedError) as raised: next(self.parse()) self.assertEqual( @@ -151,7 +151,7 @@ def test_parse(self): self.assertEqual(response.status_code, 101) self.assertEqual(response.reason_phrase, "Switching Protocols") self.assertEqual(response.headers["Upgrade"], "websocket") - self.assertIsNone(response.body) + self.assertEqual(response.body, b"") def test_parse_empty(self): self.reader.feed_eof() @@ -215,14 +215,7 @@ def test_parse_invalid_header(self): "invalid HTTP header line: Oops", ) - def test_parse_body_with_content_length(self): - self.reader.feed_data( - b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello world!\n" - ) - response = self.assertGeneratorReturns(self.parse()) - self.assertEqual(response.body, b"Hello world!\n") - - def test_parse_body_without_content_length(self): + def test_parse_body(self): self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\nHello world!\n") gen = self.parse() self.assertGeneratorRunning(gen) @@ -230,7 +223,23 @@ def test_parse_body_without_content_length(self): response = self.assertGeneratorReturns(gen) self.assertEqual(response.body, b"Hello world!\n") - def test_parse_body_with_content_length_too_long(self): + def test_parse_body_too_large(self): + self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\n" + b"a" * 1048577) + with self.assertRaises(SecurityError) as raised: + next(self.parse()) + self.assertEqual( + str(raised.exception), + "body too large: over 1048576 bytes", + ) + + def test_parse_body_with_content_length(self): + self.reader.feed_data( + b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello world!\n" + ) + response = self.assertGeneratorReturns(self.parse()) + self.assertEqual(response.body, b"Hello world!\n") + + def test_parse_body_with_content_length_and_body_too_large(self): self.reader.feed_data(b"HTTP/1.1 200 OK\r\nContent-Length: 1048577\r\n\r\n") with self.assertRaises(SecurityError) as raised: next(self.parse()) @@ -239,33 +248,79 @@ def test_parse_body_with_content_length_too_long(self): "body too large: 1048577 bytes", ) - def test_parse_body_without_content_length_too_long(self): - self.reader.feed_data(b"HTTP/1.1 200 OK\r\n\r\n" + b"a" * 1048577) + def test_parse_body_with_content_length_and_body_way_too_large(self): + self.reader.feed_data( + b"HTTP/1.1 200 OK\r\nContent-Length: 1234567890123456789\r\n\r\n" + ) with self.assertRaises(SecurityError) as raised: next(self.parse()) self.assertEqual( str(raised.exception), - "body too large: over 1048576 bytes", + "body too large: 1234567890123456789 bytes", ) - def test_parse_body_with_transfer_encoding(self): - self.reader.feed_data(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n") + def test_parse_body_with_chunked_transfer_encoding(self): + self.reader.feed_data( + b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n" + b"6\r\nHello \r\n7\r\nworld!\n\r\n0\r\n\r\n" + ) + response = self.assertGeneratorReturns(self.parse()) + self.assertEqual(response.body, b"Hello world!\n") + + def test_parse_body_with_chunked_transfer_encoding_and_chunk_without_crlf(self): + self.reader.feed_data( + b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n" + b"6\r\nHello 7\r\nworld!\n0\r\n" + ) + with self.assertRaises(ValueError) as raised: + next(self.parse()) + self.assertEqual( + str(raised.exception), + "chunk without CRLF", + ) + + def test_parse_body_with_chunked_transfer_encoding_and_chunk_too_large(self): + self.reader.feed_data( + b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n" + b"100000\r\n" + b"a" * 1048576 + b"\r\n1\r\na\r\n0\r\n\r\n" + ) + with self.assertRaises(SecurityError) as raised: + next(self.parse()) + self.assertEqual( + str(raised.exception), + "chunk too large: 1 bytes after 1048576 bytes", + ) + + def test_parse_body_with_chunked_transfer_encoding_and_chunk_way_too_large(self): + self.reader.feed_data( + b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n" + b"1234567890ABCDEF\r\n\r\n" + ) + with self.assertRaises(SecurityError) as raised: + next(self.parse()) + self.assertEqual( + str(raised.exception), + "chunk too large: 0x1234567890ABCDEF bytes", + ) + + def test_parse_body_with_unsupported_transfer_encoding(self): + self.reader.feed_data(b"HTTP/1.1 200 OK\r\nTransfer-Encoding: compress\r\n\r\n") with self.assertRaises(NotImplementedError) as raised: next(self.parse()) self.assertEqual( str(raised.exception), - "transfer codings aren't supported", + "transfer coding compress isn't supported", ) def test_parse_body_no_content(self): self.reader.feed_data(b"HTTP/1.1 204 No Content\r\n\r\n") response = self.assertGeneratorReturns(self.parse()) - self.assertIsNone(response.body) + self.assertEqual(response.body, b"") def test_parse_body_not_modified(self): self.reader.feed_data(b"HTTP/1.1 304 Not Modified\r\n\r\n") response = self.assertGeneratorReturns(self.parse()) - self.assertIsNone(response.body) + self.assertEqual(response.body, b"") def test_serialize(self): # Example from the protocol overview in RFC 6455 diff --git a/tests/test_server.py b/tests/test_server.py index 5efeca2d..69f55568 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -281,7 +281,7 @@ def test_accept_response(self, _formatdate): } ), ) - self.assertIsNone(response.body) + self.assertEqual(response.body, b"") @patch("email.utils.formatdate", return_value=DATE) def test_reject_response(self, _formatdate):