Skip to content

Commit

Permalink
adds proxy_server option to wacz
Browse files Browse the repository at this point in the history
  • Loading branch information
msramalho committed Oct 6, 2024
1 parent e6f5981 commit e495501
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/auto_archiver/enrichers/wacz_enricher.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def configs() -> dict:
"extract_screenshot": {"default": True, "help": "If enabled the screenshot captured by browsertrix will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched."},
"socks_proxy_host": {"default": None, "help": "SOCKS proxy host for browsertrix-crawler, use in combination with socks_proxy_port. eg: user:password@host"},
"socks_proxy_port": {"default": None, "help": "SOCKS proxy port for browsertrix-crawler, use in combination with socks_proxy_host. eg 1234"},
"proxy_server": {"default": None, "help": "SOCKS server proxy URL, in development"},
}

def setup(self) -> None:
Expand Down Expand Up @@ -113,7 +114,10 @@ def enrich(self, to_enrich: Metadata) -> bool:
try:
logger.info(f"Running browsertrix-crawler: {' '.join(cmd)}")
my_env = os.environ.copy()
if self.socks_proxy_host and self.socks_proxy_port:
if self.proxy_server:
logger.debug("Using PROXY_SERVER proxy for browsertrix-crawler")
my_env["PROXY_SERVER"] = self.proxy_server
elif self.socks_proxy_host and self.socks_proxy_port:
logger.debug("Using SOCKS proxy for browsertrix-crawler")
my_env["SOCKS_HOST"] = self.socks_proxy_host
my_env["SOCKS_PORT"] = str(self.socks_proxy_port)
Expand Down

0 comments on commit e495501

Please sign in to comment.