From 10175f7a41ea1cf17aff7983d7eace3c2e4da5c0 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Mon, 27 Jan 2025 22:07:21 +0100 Subject: [PATCH] Refactor SOCKS proxy implementation. --- src/websockets/asyncio/client.py | 109 +++++++++++++++----------- src/websockets/sync/client.py | 127 ++++++++++++++++++------------- 2 files changed, 141 insertions(+), 95 deletions(-) diff --git a/src/websockets/asyncio/client.py b/src/websockets/asyncio/client.py index f76095ea..7052ca85 100644 --- a/src/websockets/asyncio/client.py +++ b/src/websockets/asyncio/client.py @@ -3,6 +3,7 @@ import asyncio import logging import os +import socket import traceback import urllib.parse from collections.abc import AsyncIterator, Generator, Sequence @@ -357,15 +358,12 @@ async def create_connection(self) -> ClientConnection: ws_uri = parse_uri(self.uri) proxy = self.proxy - proxy_uri: Proxy | None = None if kwargs.get("unix", False): proxy = None if kwargs.get("sock") is not None: proxy = None if proxy is True: proxy = get_proxy(ws_uri) - if proxy is not None: - proxy_uri = parse_proxy(proxy) def factory() -> ClientConnection: return self.protocol_factory(ws_uri) @@ -381,48 +379,14 @@ def factory() -> ClientConnection: if kwargs.pop("unix", False): _, connection = await loop.create_unix_connection(factory, **kwargs) + elif proxy is not None: + kwargs["sock"] = await connect_proxy( + parse_proxy(proxy), + ws_uri, + local_addr=kwargs.pop("local_addr", None), + ) + _, connection = await loop.create_connection(factory, **kwargs) else: - if proxy_uri is not None: - if proxy_uri.scheme[:5] == "socks": - try: - from python_socks import ProxyType - from python_socks.async_.asyncio import Proxy - except ImportError: - raise ImportError( - "python-socks is required to use a SOCKS proxy" - ) - if proxy_uri.scheme == "socks5h": - proxy_type = ProxyType.SOCKS5 - rdns = True - elif proxy_uri.scheme == "socks5": - proxy_type = ProxyType.SOCKS5 - rdns = False - # We use mitmproxy for testing and it doesn't support SOCKS4. - elif proxy_uri.scheme == "socks4a": # pragma: no cover - proxy_type = ProxyType.SOCKS4 - rdns = True - elif proxy_uri.scheme == "socks4": # pragma: no cover - proxy_type = ProxyType.SOCKS4 - rdns = False - # Proxy types are enforced in parse_proxy(). - else: - raise AssertionError("unsupported SOCKS proxy") - socks_proxy = Proxy( - proxy_type, - proxy_uri.host, - proxy_uri.port, - proxy_uri.username, - proxy_uri.password, - rdns, - ) - kwargs["sock"] = await socks_proxy.connect( - ws_uri.host, - ws_uri.port, - local_addr=kwargs.pop("local_addr", None), - ) - # Proxy types are enforced in parse_proxy(). - else: - raise AssertionError("unsupported proxy") if kwargs.get("sock") is None: kwargs.setdefault("host", ws_uri.host) kwargs.setdefault("port", ws_uri.port) @@ -624,3 +588,60 @@ def unix_connect( else: uri = "wss://localhost/" return connect(uri=uri, unix=True, path=path, **kwargs) + + +try: + from python_socks import ProxyType + from python_socks.async_.asyncio import Proxy as SocksProxy + + SOCKS_PROXY_TYPES = { + "socks5h": ProxyType.SOCKS5, + "socks5": ProxyType.SOCKS5, + "socks4a": ProxyType.SOCKS4, + "socks4": ProxyType.SOCKS4, + } + + SOCKS_PROXY_RDNS = { + "socks5h": True, + "socks5": False, + "socks4a": True, + "socks4": False, + } + + async def connect_socks_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + **kwargs: Any, + ) -> socket.socket: + """Connect via a SOCKS proxy and return the socket.""" + socks_proxy = SocksProxy( + SOCKS_PROXY_TYPES[proxy.scheme], + proxy.host, + proxy.port, + proxy.username, + proxy.password, + SOCKS_PROXY_RDNS[proxy.scheme], + ) + return await socks_proxy.connect(ws_uri.host, ws_uri.port, **kwargs) + +except ImportError: + + async def connect_socks_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + **kwargs: Any, + ) -> socket.socket: + raise ImportError("python-socks is required to use a SOCKS proxy") + + +async def connect_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + **kwargs: Any, +) -> socket.socket: + """Connect via a proxy and return the socket.""" + # parse_proxy() validates proxy.scheme. + if proxy.scheme[:5] == "socks": + return await connect_socks_proxy(proxy, ws_uri, **kwargs) + else: + raise AssertionError("unsupported proxy") diff --git a/src/websockets/sync/client.py b/src/websockets/sync/client.py index 96f62eda..5fbec67a 100644 --- a/src/websockets/sync/client.py +++ b/src/websockets/sync/client.py @@ -15,7 +15,7 @@ from ..http11 import USER_AGENT, Response from ..protocol import CONNECTING, Event from ..typing import LoggerLike, Origin, Subprotocol -from ..uri import Proxy, get_proxy, parse_proxy, parse_uri +from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri from .connection import Connection from .utils import Deadline @@ -258,15 +258,12 @@ def connect( elif compression is not None: raise ValueError(f"unsupported compression: {compression}") - proxy_uri: Proxy | None = None if unix: proxy = None if sock is not None: proxy = None if proxy is True: proxy = get_proxy(ws_uri) - if proxy is not None: - proxy_uri = parse_proxy(proxy) # Calculate timeouts on the TCP, TLS, and WebSocket handshakes. # The TCP and TLS timeouts must be set on the socket, then removed @@ -285,54 +282,21 @@ def connect( sock.settimeout(deadline.timeout()) assert path is not None # mypy cannot figure this out sock.connect(path) + elif proxy is not None: + sock = connect_proxy( + parse_proxy(proxy), + ws_uri, + deadline, + # websockets is consistent with the socket module while + # python_socks is consistent across implementations. + local_addr=kwargs.pop("source_address", None), + ) else: - if proxy_uri is not None: - if proxy_uri.scheme[:5] == "socks": - try: - from python_socks import ProxyType - from python_socks.sync import Proxy - except ImportError: - raise ImportError( - "python-socks is required to use a SOCKS proxy" - ) - if proxy_uri.scheme == "socks5h": - proxy_type = ProxyType.SOCKS5 - rdns = True - elif proxy_uri.scheme == "socks5": - proxy_type = ProxyType.SOCKS5 - rdns = False - # We use mitmproxy for testing and it doesn't support SOCKS4. - elif proxy_uri.scheme == "socks4a": # pragma: no cover - proxy_type = ProxyType.SOCKS4 - rdns = True - elif proxy_uri.scheme == "socks4": # pragma: no cover - proxy_type = ProxyType.SOCKS4 - rdns = False - # Proxy types are enforced in parse_proxy(). - else: - raise AssertionError("unsupported SOCKS proxy") - socks_proxy = Proxy( - proxy_type, - proxy_uri.host, - proxy_uri.port, - proxy_uri.username, - proxy_uri.password, - rdns, - ) - sock = socks_proxy.connect( - ws_uri.host, - ws_uri.port, - timeout=deadline.timeout(), - local_addr=kwargs.pop("local_addr", None), - ) - # Proxy types are enforced in parse_proxy(). - else: - raise AssertionError("unsupported proxy") - else: - kwargs.setdefault("timeout", deadline.timeout()) - sock = socket.create_connection( - (ws_uri.host, ws_uri.port), **kwargs - ) + kwargs.setdefault("timeout", deadline.timeout()) + sock = socket.create_connection( + (ws_uri.host, ws_uri.port), + **kwargs, + ) sock.settimeout(None) # Disable Nagle algorithm @@ -420,3 +384,64 @@ def unix_connect( else: uri = "wss://localhost/" return connect(uri=uri, unix=True, path=path, **kwargs) + + +try: + from python_socks import ProxyType + from python_socks.sync import Proxy as SocksProxy + + SOCKS_PROXY_TYPES = { + "socks5h": ProxyType.SOCKS5, + "socks5": ProxyType.SOCKS5, + "socks4a": ProxyType.SOCKS4, + "socks4": ProxyType.SOCKS4, + } + + SOCKS_PROXY_RDNS = { + "socks5h": True, + "socks5": False, + "socks4a": True, + "socks4": False, + } + + def connect_socks_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + deadline: Deadline, + **kwargs: Any, + ) -> socket.socket: + """Connect via a SOCKS proxy and return the socket.""" + socks_proxy = SocksProxy( + SOCKS_PROXY_TYPES[proxy.scheme], + proxy.host, + proxy.port, + proxy.username, + proxy.password, + SOCKS_PROXY_RDNS[proxy.scheme], + ) + kwargs.setdefault("timeout", deadline.timeout()) + return socks_proxy.connect(ws_uri.host, ws_uri.port, **kwargs) + +except ImportError: + + def connect_socks_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + deadline: Deadline, + **kwargs: Any, + ) -> socket.socket: + raise ImportError("python-socks is required to use a SOCKS proxy") + + +def connect_proxy( + proxy: Proxy, + ws_uri: WebSocketURI, + deadline: Deadline, + **kwargs: Any, +) -> socket.socket: + """Connect via a proxy and return the socket.""" + # parse_proxy() validates proxy.scheme. + if proxy.scheme[:5] == "socks": + return connect_socks_proxy(proxy, ws_uri, deadline, **kwargs) + else: + raise AssertionError("unsupported proxy")