diff --git a/docs/reference/features.rst b/docs/reference/features.rst index eaecd02a..93b083d2 100644 --- a/docs/reference/features.rst +++ b/docs/reference/features.rst @@ -166,12 +166,11 @@ Client | Perform HTTP Digest Authentication | ❌ | ❌ | ❌ | ❌ | | (`#784`_) | | | | | +------------------------------------+--------+--------+--------+--------+ - | Connect via HTTP proxy (`#364`_) | ❌ | ❌ | — | ❌ | + | Connect via HTTP proxy | ✅ | ✅ | — | ❌ | +------------------------------------+--------+--------+--------+--------+ | Connect via SOCKS5 proxy | ✅ | ✅ | — | ❌ | +------------------------------------+--------+--------+--------+--------+ -.. _#364: https://github.com/python-websockets/websockets/issues/364 .. _#784: https://github.com/python-websockets/websockets/issues/784 Known limitations diff --git a/docs/topics/proxies.rst b/docs/topics/proxies.rst index fd3ae78b..3167eab8 100644 --- a/docs/topics/proxies.rst +++ b/docs/topics/proxies.rst @@ -64,3 +64,8 @@ SOCKS proxy is configured in the operating system, python-socks uses SOCKS5h. python-socks supports username/password authentication for SOCKS5 (:rfc:`1929`) but does not support other authentication methods such as GSSAPI (:rfc:`1961`). + +HTTP proxies +------------ + +TODO diff --git a/src/websockets/asyncio/client.py b/src/websockets/asyncio/client.py index 7052ca85..bfdd6585 100644 --- a/src/websockets/asyncio/client.py +++ b/src/websockets/asyncio/client.py @@ -4,6 +4,7 @@ import logging import os import socket +import ssl import traceback import urllib.parse from collections.abc import AsyncIterator, Generator, Sequence @@ -213,6 +214,9 @@ class connect: to :obj:`None` to disable the proxy or to the address of a proxy to override the system configuration. See the :doc:`proxy docs <../../topics/proxies>` for details. + proxy_ssl: Configuration for enabling TLS on the proxy connection. + proxy_server_hostname: Host name for the TLS handshake with the proxy. + ``proxy_server_hostname`` overrides the host name from ``proxy``. process_exception: When reconnecting automatically, tell whether an error is transient or fatal. The default behavior is defined by :func:`process_exception`. Refer to its documentation for details. @@ -255,7 +259,7 @@ class connect: the TLS handshake. * You can set ``host`` and ``port`` to connect to a different host and port - from those found in ``uri``. This only changes the destination of the TCP + from those found in ``uri``. This only changes the ws_uri of the TCP connection. The host name from ``uri`` is still used in the TLS handshake for secure connections and in the ``Host`` header. @@ -285,6 +289,8 @@ def __init__( additional_headers: HeadersLike | None = None, user_agent_header: str | None = USER_AGENT, proxy: str | Literal[True] | None = True, + proxy_ssl: ssl.SSLContext | None = None, + proxy_server_hostname: str | None = None, process_exception: Callable[[Exception], Exception | None] = process_exception, # Timeouts open_timeout: float | None = 10, @@ -634,6 +640,17 @@ async def connect_socks_proxy( raise ImportError("python-socks is required to use a SOCKS proxy") +async def connect_http_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + *, + proxy_ssl: ssl.SSLContext | None = None, + proxy_server_hostname: str | None = None, + **kwargs: Any, +) -> socket.socket: + raise NotImplementedError("HTTP proxy support is not implemented") + + async def connect_proxy( proxy: Proxy, ws_uri: WebSocketURI, @@ -643,5 +660,7 @@ async def connect_proxy( # parse_proxy() validates proxy.scheme. if proxy.scheme[:5] == "socks": return await connect_socks_proxy(proxy, ws_uri, **kwargs) + elif proxy.scheme[:4] == "http": + return await connect_http_proxy(proxy, ws_uri, **kwargs) else: raise AssertionError("unsupported proxy") diff --git a/src/websockets/http11.py b/src/websockets/http11.py index 49d7b9a4..530ac3d0 100644 --- a/src/websockets/http11.py +++ b/src/websockets/http11.py @@ -210,6 +210,7 @@ def parse( read_line: Callable[[int], Generator[None, None, bytes]], read_exact: Callable[[int], Generator[None, None, bytes]], read_to_eof: Callable[[int], Generator[None, None, bytes]], + include_body: bool = True, ) -> Generator[None, None, Response]: """ Parse a WebSocket handshake response. @@ -265,9 +266,12 @@ def parse( headers = yield from parse_headers(read_line) - body = yield from read_body( - status_code, headers, read_line, read_exact, read_to_eof - ) + if include_body: + body = yield from read_body( + status_code, headers, read_line, read_exact, read_to_eof + ) + else: + body = b"" return cls(status_code, reason, headers, body) diff --git a/src/websockets/sync/client.py b/src/websockets/sync/client.py index 9e0ce922..1e41a965 100644 --- a/src/websockets/sync/client.py +++ b/src/websockets/sync/client.py @@ -8,12 +8,14 @@ from typing import Any, Literal from ..client import ClientProtocol -from ..datastructures import HeadersLike +from ..datastructures import Headers, HeadersLike +from ..exceptions import InvalidProxyMessage, InvalidProxyStatus from ..extensions.base import ClientExtensionFactory from ..extensions.permessage_deflate import enable_client_permessage_deflate -from ..headers import validate_subprotocols +from ..headers import build_authorization_basic, build_host, validate_subprotocols from ..http11 import USER_AGENT, Response from ..protocol import CONNECTING, Event +from ..streams import StreamReader from ..typing import LoggerLike, Origin, Subprotocol from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri from .connection import Connection @@ -140,6 +142,8 @@ def connect( additional_headers: HeadersLike | None = None, user_agent_header: str | None = USER_AGENT, proxy: str | Literal[True] | None = True, + proxy_ssl: ssl_module.SSLContext | None = None, + proxy_server_hostname: str | None = None, # Timeouts open_timeout: float | None = 10, ping_interval: float | None = 20, @@ -194,6 +198,9 @@ def connect( to :obj:`None` to disable the proxy or to the address of a proxy to override the system configuration. See the :doc:`proxy docs <../../topics/proxies>` for details. + proxy_ssl: Configuration for enabling TLS on the proxy connection. + proxy_server_hostname: Host name for the TLS handshake with the proxy. + ``proxy_server_hostname`` overrides the host name from ``proxy``. open_timeout: Timeout for opening the connection in seconds. :obj:`None` disables the timeout. ping_interval: Interval between keepalive pings in seconds. @@ -287,6 +294,8 @@ def connect( parse_proxy(proxy), ws_uri, deadline, + proxy_ssl=proxy_ssl, + proxy_server_hostname=proxy_server_hostname, **kwargs, ) else: @@ -431,6 +440,84 @@ def connect_socks_proxy( raise ImportError("python-socks is required to use a SOCKS proxy") +def connect_http_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + deadline: Deadline, + *, + proxy_ssl: ssl_module.SSLContext | None = None, + proxy_server_hostname: str | None = None, + **kwargs: Any, +) -> socket.socket: + if proxy.scheme[:5] != "https" and proxy_ssl is not None: + raise ValueError("proxy_ssl argument is incompatible with an http:// proxy") + + # Connect socket + + kwargs.setdefault("timeout", deadline.timeout()) + sock = socket.create_connection((proxy.host, proxy.port), **kwargs) + + # Initialize TLS wrapper and perform TLS handshake + + if proxy.scheme[:5] == "https": + if proxy_ssl is None: + proxy_ssl = ssl_module.create_default_context() + if proxy_server_hostname is None: + proxy_server_hostname = proxy.host + sock.settimeout(deadline.timeout()) + sock = proxy_ssl.wrap_socket(sock, server_hostname=proxy_server_hostname) + sock.settimeout(None) + + # Send CONNECT request to the proxy. + + proxy_headers = Headers() + proxy_headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure) + if proxy.username is not None: + assert proxy.password is not None # enforced by parse_proxy + proxy_headers["Proxy-Authorization"] = build_authorization_basic( + proxy.username, + proxy.password, + ) + + connect_host = build_host( + ws_uri.host, + ws_uri.port, + ws_uri.secure, + always_include_port=True, + ) + # We cannot use the Request class because it supports only GET requests. + proxy_request = f"CONNECT {connect_host} HTTP/1.1\r\n".encode() + proxy_request += proxy_headers.serialize() + sock.sendall(proxy_request) + + # Read response from the proxy. + + reader = StreamReader() + parser = Response.parse( + reader.read_line, + reader.read_exact, + reader.read_to_eof, + include_body=False, + ) + try: + while True: + sock.settimeout(deadline.timeout()) + reader.feed_data(sock.recv(4096)) + next(parser) + except StopIteration as exc: + response = exc.value + except Exception as exc: + raise InvalidProxyMessage( + "did not receive a valid HTTP response from proxy" + ) from exc + finally: + sock.settimeout(None) + if not 200 <= response.status_code < 300: + raise InvalidProxyStatus(response) + + return sock + + def connect_proxy( proxy: Proxy, ws_uri: WebSocketURI, @@ -441,5 +528,7 @@ def connect_proxy( # parse_proxy() validates proxy.scheme. if proxy.scheme[:5] == "socks": return connect_socks_proxy(proxy, ws_uri, deadline, **kwargs) + elif proxy.scheme[:4] == "http": + return connect_http_proxy(proxy, ws_uri, deadline, **kwargs) else: raise AssertionError("unsupported proxy") diff --git a/tests/asyncio/test_client.py b/tests/asyncio/test_client.py index cb2b8ede..a479e61f 100644 --- a/tests/asyncio/test_client.py +++ b/tests/asyncio/test_client.py @@ -578,6 +578,18 @@ async def socks_proxy(self, auth=None): with patch_environ({"socks_proxy": proxy_uri}): yield record_flows + @contextlib.asynccontextmanager + async def http_proxy(self, auth=None): + if auth: + proxyauth = "hello:iloveyou" + proxy_uri = "http://hello:iloveyou@localhost:8080" + else: + proxyauth = None + proxy_uri = "http://localhost:8080" + async with async_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows: + with patch_environ({"https_proxy": proxy_uri}): + yield record_flows + async def test_socks_proxy(self): """Client connects to server through a SOCKS5 proxy.""" async with self.socks_proxy() as proxy: @@ -602,6 +614,15 @@ async def test_authenticated_socks_proxy(self): self.assertEqual(client.protocol.state.name, "OPEN") self.assertEqual(len(proxy.get_flows()), 1) + @unittest.expectedFailure + async def test_http_proxy(self): + """Client connects to server through a HTTP proxy.""" + async with self.http_proxy() as proxy: + async with serve(*args) as server: + async with connect(get_uri(server)) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + async def test_explicit_proxy(self): """Client connects to server through a proxy set explicitly.""" async with async_proxy(mode=["socks5"]) as proxy: diff --git a/tests/sync/test_client.py b/tests/sync/test_client.py index 2f62dd34..4dc4b0e3 100644 --- a/tests/sync/test_client.py +++ b/tests/sync/test_client.py @@ -320,6 +320,19 @@ def socks_proxy(self, auth=None): with patch_environ({"socks_proxy": proxy_uri}): yield record_flows + @contextlib.contextmanager + def http_proxy(self, auth=None): + if auth: + proxyauth = "hello:iloveyou" + proxy_uri = "http://hello:iloveyou@localhost:8080" + else: + proxyauth = None + proxy_uri = "http://localhost:8080" + + with sync_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows: + with patch_environ({"https_proxy": proxy_uri}): + yield record_flows + def test_socks_proxy(self): """Client connects to server through a SOCKS5 proxy.""" with self.socks_proxy() as proxy: @@ -344,6 +357,14 @@ def test_authenticated_socks_proxy(self): self.assertEqual(client.protocol.state.name, "OPEN") self.assertEqual(len(proxy.get_flows()), 1) + def test_http_proxy(self): + """Client connects to server through a HTTP proxy.""" + with self.http_proxy() as proxy: + with run_server() as server: + with connect(get_uri(server)) as client: + self.assertEqual(client.protocol.state.name, "OPEN") + self.assertEqual(len(proxy.get_flows()), 1) + def test_explicit_proxy(self): """Client connects to server through a proxy set explicitly.""" with sync_proxy(mode=["socks5"]) as proxy: