Skip to content

Commit

Permalink
Spike HTTP(S) proxy implementation for sync client.
Browse files Browse the repository at this point in the history
  • Loading branch information
aaugustin committed Jan 27, 2025
1 parent c7de9b1 commit c4a8ab8
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 8 deletions.
3 changes: 1 addition & 2 deletions docs/reference/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,11 @@ Client
| Perform HTTP Digest Authentication |||||
| (`#784`_) | | | | |
+------------------------------------+--------+--------+--------+--------+
| Connect via HTTP proxy (`#364`_) | | |||
| Connect via HTTP proxy | | |||
+------------------------------------+--------+--------+--------+--------+
| Connect via SOCKS5 proxy |||||
+------------------------------------+--------+--------+--------+--------+

.. _#364: https://github.com/python-websockets/websockets/issues/364
.. _#784: https://github.com/python-websockets/websockets/issues/784

Known limitations
Expand Down
5 changes: 5 additions & 0 deletions docs/topics/proxies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ SOCKS proxy is configured in the operating system, python-socks uses SOCKS5h.

python-socks supports username/password authentication for SOCKS5 (:rfc:`1929`)
but does not support other authentication methods such as GSSAPI (:rfc:`1961`).

HTTP proxies
------------

TODO
21 changes: 20 additions & 1 deletion src/websockets/asyncio/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os
import socket
import ssl
import traceback
import urllib.parse
from collections.abc import AsyncIterator, Generator, Sequence
Expand Down Expand Up @@ -213,6 +214,9 @@ class connect:
to :obj:`None` to disable the proxy or to the address of a proxy
to override the system configuration. See the :doc:`proxy docs
<../../topics/proxies>` for details.
proxy_ssl: Configuration for enabling TLS on the proxy connection.
proxy_server_hostname: Host name for the TLS handshake with the proxy.
``proxy_server_hostname`` overrides the host name from ``proxy``.
process_exception: When reconnecting automatically, tell whether an
error is transient or fatal. The default behavior is defined by
:func:`process_exception`. Refer to its documentation for details.
Expand Down Expand Up @@ -255,7 +259,7 @@ class connect:
the TLS handshake.
* You can set ``host`` and ``port`` to connect to a different host and port
from those found in ``uri``. This only changes the destination of the TCP
from those found in ``uri``. This only changes the ws_uri of the TCP
connection. The host name from ``uri`` is still used in the TLS handshake
for secure connections and in the ``Host`` header.
Expand Down Expand Up @@ -285,6 +289,8 @@ def __init__(
additional_headers: HeadersLike | None = None,
user_agent_header: str | None = USER_AGENT,
proxy: str | Literal[True] | None = True,
proxy_ssl: ssl.SSLContext | None = None,
proxy_server_hostname: str | None = None,
process_exception: Callable[[Exception], Exception | None] = process_exception,
# Timeouts
open_timeout: float | None = 10,
Expand Down Expand Up @@ -634,6 +640,17 @@ async def connect_socks_proxy(
raise ImportError("python-socks is required to use a SOCKS proxy")


async def connect_http_proxy(
proxy: Proxy,
ws_uri: WebSocketURI,
*,
proxy_ssl: ssl.SSLContext | None = None,
proxy_server_hostname: str | None = None,
**kwargs: Any,
) -> socket.socket:
raise NotImplementedError("HTTP proxy support is not implemented")


async def connect_proxy(
proxy: Proxy,
ws_uri: WebSocketURI,
Expand All @@ -643,5 +660,7 @@ async def connect_proxy(
# parse_proxy() validates proxy.scheme.
if proxy.scheme[:5] == "socks":
return await connect_socks_proxy(proxy, ws_uri, **kwargs)
elif proxy.scheme[:4] == "http":
return await connect_http_proxy(proxy, ws_uri, **kwargs)
else:
raise AssertionError("unsupported proxy")
10 changes: 7 additions & 3 deletions src/websockets/http11.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def parse(
read_line: Callable[[int], Generator[None, None, bytes]],
read_exact: Callable[[int], Generator[None, None, bytes]],
read_to_eof: Callable[[int], Generator[None, None, bytes]],
include_body: bool = True,
) -> Generator[None, None, Response]:
"""
Parse a WebSocket handshake response.
Expand Down Expand Up @@ -265,9 +266,12 @@ def parse(

headers = yield from parse_headers(read_line)

body = yield from read_body(
status_code, headers, read_line, read_exact, read_to_eof
)
if include_body:
body = yield from read_body(
status_code, headers, read_line, read_exact, read_to_eof
)
else:
body = b""

return cls(status_code, reason, headers, body)

Expand Down
93 changes: 91 additions & 2 deletions src/websockets/sync/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
from typing import Any, Literal

from ..client import ClientProtocol
from ..datastructures import HeadersLike
from ..datastructures import Headers, HeadersLike
from ..exceptions import InvalidProxyMessage, InvalidProxyStatus
from ..extensions.base import ClientExtensionFactory
from ..extensions.permessage_deflate import enable_client_permessage_deflate
from ..headers import validate_subprotocols
from ..headers import build_authorization_basic, build_host, validate_subprotocols
from ..http11 import USER_AGENT, Response
from ..protocol import CONNECTING, Event
from ..streams import StreamReader
from ..typing import LoggerLike, Origin, Subprotocol
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
from .connection import Connection
Expand Down Expand Up @@ -140,6 +142,8 @@ def connect(
additional_headers: HeadersLike | None = None,
user_agent_header: str | None = USER_AGENT,
proxy: str | Literal[True] | None = True,
proxy_ssl: ssl_module.SSLContext | None = None,
proxy_server_hostname: str | None = None,
# Timeouts
open_timeout: float | None = 10,
ping_interval: float | None = 20,
Expand Down Expand Up @@ -194,6 +198,9 @@ def connect(
to :obj:`None` to disable the proxy or to the address of a proxy
to override the system configuration. See the :doc:`proxy docs
<../../topics/proxies>` for details.
proxy_ssl: Configuration for enabling TLS on the proxy connection.
proxy_server_hostname: Host name for the TLS handshake with the proxy.
``proxy_server_hostname`` overrides the host name from ``proxy``.
open_timeout: Timeout for opening the connection in seconds.
:obj:`None` disables the timeout.
ping_interval: Interval between keepalive pings in seconds.
Expand Down Expand Up @@ -287,6 +294,8 @@ def connect(
parse_proxy(proxy),
ws_uri,
deadline,
proxy_ssl=proxy_ssl,
proxy_server_hostname=proxy_server_hostname,
**kwargs,
)
else:
Expand Down Expand Up @@ -431,6 +440,84 @@ def connect_socks_proxy(
raise ImportError("python-socks is required to use a SOCKS proxy")


def connect_http_proxy(
proxy: Proxy,
ws_uri: WebSocketURI,
deadline: Deadline,
*,
proxy_ssl: ssl_module.SSLContext | None = None,
proxy_server_hostname: str | None = None,
**kwargs: Any,
) -> socket.socket:
if proxy.scheme[:5] != "https" and proxy_ssl is not None:
raise ValueError("proxy_ssl argument is incompatible with an http:// proxy")

# Connect socket

kwargs.setdefault("timeout", deadline.timeout())
sock = socket.create_connection((proxy.host, proxy.port), **kwargs)

# Initialize TLS wrapper and perform TLS handshake

if proxy.scheme[:5] == "https":
if proxy_ssl is None:
proxy_ssl = ssl_module.create_default_context()
if proxy_server_hostname is None:
proxy_server_hostname = proxy.host
sock.settimeout(deadline.timeout())
sock = proxy_ssl.wrap_socket(sock, server_hostname=proxy_server_hostname)
sock.settimeout(None)

# Send CONNECT request to the proxy.

proxy_headers = Headers()
proxy_headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
if proxy.username is not None:
assert proxy.password is not None # enforced by parse_proxy
proxy_headers["Proxy-Authorization"] = build_authorization_basic(
proxy.username,
proxy.password,
)

connect_host = build_host(
ws_uri.host,
ws_uri.port,
ws_uri.secure,
always_include_port=True,
)
# We cannot use the Request class because it supports only GET requests.
proxy_request = f"CONNECT {connect_host} HTTP/1.1\r\n".encode()
proxy_request += proxy_headers.serialize()
sock.sendall(proxy_request)

# Read response from the proxy.

reader = StreamReader()
parser = Response.parse(
reader.read_line,
reader.read_exact,
reader.read_to_eof,
include_body=False,
)
try:
while True:
sock.settimeout(deadline.timeout())
reader.feed_data(sock.recv(4096))
next(parser)
except StopIteration as exc:
response = exc.value
except Exception as exc:
raise InvalidProxyMessage(
"did not receive a valid HTTP response from proxy"
) from exc
finally:
sock.settimeout(None)
if not 200 <= response.status_code < 300:
raise InvalidProxyStatus(response)

return sock


def connect_proxy(
proxy: Proxy,
ws_uri: WebSocketURI,
Expand All @@ -441,5 +528,7 @@ def connect_proxy(
# parse_proxy() validates proxy.scheme.
if proxy.scheme[:5] == "socks":
return connect_socks_proxy(proxy, ws_uri, deadline, **kwargs)
elif proxy.scheme[:4] == "http":
return connect_http_proxy(proxy, ws_uri, deadline, **kwargs)
else:
raise AssertionError("unsupported proxy")
21 changes: 21 additions & 0 deletions tests/asyncio/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,18 @@ async def socks_proxy(self, auth=None):
with patch_environ({"socks_proxy": proxy_uri}):
yield record_flows

@contextlib.asynccontextmanager
async def http_proxy(self, auth=None):
if auth:
proxyauth = "hello:iloveyou"
proxy_uri = "http://hello:iloveyou@localhost:8080"
else:
proxyauth = None
proxy_uri = "http://localhost:8080"
async with async_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows:
with patch_environ({"https_proxy": proxy_uri}):
yield record_flows

async def test_socks_proxy(self):
"""Client connects to server through a SOCKS5 proxy."""
async with self.socks_proxy() as proxy:
Expand All @@ -602,6 +614,15 @@ async def test_authenticated_socks_proxy(self):
self.assertEqual(client.protocol.state.name, "OPEN")
self.assertEqual(len(proxy.get_flows()), 1)

@unittest.expectedFailure
async def test_http_proxy(self):
"""Client connects to server through a HTTP proxy."""
async with self.http_proxy() as proxy:
async with serve(*args) as server:
async with connect(get_uri(server)) as client:
self.assertEqual(client.protocol.state.name, "OPEN")
self.assertEqual(len(proxy.get_flows()), 1)

async def test_explicit_proxy(self):
"""Client connects to server through a proxy set explicitly."""
async with async_proxy(mode=["socks5"]) as proxy:
Expand Down
21 changes: 21 additions & 0 deletions tests/sync/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,19 @@ def socks_proxy(self, auth=None):
with patch_environ({"socks_proxy": proxy_uri}):
yield record_flows

@contextlib.contextmanager
def http_proxy(self, auth=None):
if auth:
proxyauth = "hello:iloveyou"
proxy_uri = "http://hello:iloveyou@localhost:8080"
else:
proxyauth = None
proxy_uri = "http://localhost:8080"

with sync_proxy(mode=["regular"], proxyauth=proxyauth) as record_flows:
with patch_environ({"https_proxy": proxy_uri}):
yield record_flows

def test_socks_proxy(self):
"""Client connects to server through a SOCKS5 proxy."""
with self.socks_proxy() as proxy:
Expand All @@ -344,6 +357,14 @@ def test_authenticated_socks_proxy(self):
self.assertEqual(client.protocol.state.name, "OPEN")
self.assertEqual(len(proxy.get_flows()), 1)

def test_http_proxy(self):
"""Client connects to server through a HTTP proxy."""
with self.http_proxy() as proxy:
with run_server() as server:
with connect(get_uri(server)) as client:
self.assertEqual(client.protocol.state.name, "OPEN")
self.assertEqual(len(proxy.get_flows()), 1)

def test_explicit_proxy(self):
"""Client connects to server through a proxy set explicitly."""
with sync_proxy(mode=["socks5"]) as proxy:
Expand Down

0 comments on commit c4a8ab8

Please sign in to comment.