Skip to content

Commit

Permalink
feat: adding geoip parameter to the StealthyFetcher
Browse files Browse the repository at this point in the history
  • Loading branch information
D4Vinci committed Dec 12, 2024
1 parent 6f87420 commit bfe9063
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ True
| addons | List of Firefox addons to use. **Must be paths to extracted addons.** | ✔️ |
| humanize | Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window. | ✔️ |
| allow_webgl | Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled. | ✔️ |
| geoip | Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address. It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region. | ✔️ |
| disable_ads | Enabled by default, this installs `uBlock Origin` addon on the browser if enabled. | ✔️ |
| network_idle | Wait for the page until there are no network connections for at least 500 ms. | ✔️ |
| timeout | The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000. | ✔️ |
Expand Down
5 changes: 5 additions & 0 deletions scrapling/engines/camo.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(
timeout: Optional[float] = 30000, page_action: Callable = do_nothing, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
geoip: Optional[bool] = False,
adaptor_arguments: Dict = None,
):
"""An engine that utilizes Camoufox library, check the `StealthyFetcher` class for more documentation.
Expand All @@ -38,6 +39,8 @@ def __init__(
:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
:param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
:param wait_selector: Wait for a specific css selector to be in a specific state.
:param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
:param wait_selector_state: The state to wait for the selector given with `wait_selector`. Default state is `attached`.
:param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
Expand All @@ -53,6 +56,7 @@ def __init__(
self.google_search = bool(google_search)
self.os_randomize = bool(os_randomize)
self.disable_ads = bool(disable_ads)
self.geoip = bool(geoip)
self.extra_headers = extra_headers or {}
self.proxy = construct_proxy_dict(proxy)
self.addons = addons or []
Expand All @@ -76,6 +80,7 @@ def fetch(self, url: str) -> Response:
"""
addons = [] if self.disable_ads else [DefaultAddons.UBO]
with Camoufox(
geoip=self.geoip,
proxy=self.proxy,
addons=self.addons,
exclude_addons=addons,
Expand Down
5 changes: 4 additions & 1 deletion scrapling/fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def fetch(
block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
timeout: Optional[float] = 30000, page_action: Callable = do_nothing, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
) -> Response:
"""
Opens up a browser and do your request based on your chosen options below.
Expand All @@ -100,6 +100,8 @@ def fetch(
:param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled.
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
:param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
:param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
:param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
Expand All @@ -113,6 +115,7 @@ def fetch(
"""
engine = CamoufoxEngine(
proxy=proxy,
geoip=geoip,
addons=addons,
timeout=timeout,
headless=headless,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
'httpx[brotli,zstd]',
'playwright>=1.49.1',
'rebrowser-playwright>=1.49.1',
'camoufox>=0.4.7',
'camoufox[geoip]>=0.4.7',
'browserforge',
],
python_requires=">=3.9",
Expand Down

0 comments on commit bfe9063

Please sign in to comment.