Skip to content

Commit

Permalink
Merge pull request #258 from Crinibus/fix/amazon-domain
Browse files Browse the repository at this point in the history
Remove characters other than digits, commas (,) and periods (.) when getting price for Amazon
  • Loading branch information
Crinibus authored Jan 21, 2025
2 parents a219eee + 6b3d626 commit 35a589f
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 8 deletions.
12 changes: 9 additions & 3 deletions scraper/domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,8 @@ def _get_product_name(self) -> str:
return self.request_data.find("span", id="productTitle").text.strip()

def _get_product_price(self) -> float:
return float(
self.request_data.find("span", class_="a-price").span.text.replace("$", "").replace(",", "").replace(" ", "")
)
raw_price = self.request_data.find("span", class_="a-price").span.text.replace(",", "").replace(" ", "")
return float(get_number_string(raw_price))

def _get_product_currency(self) -> str:
regex_pattern = "%22currencyCode%22%3A%22(.{3})%22"
Expand Down Expand Up @@ -548,6 +547,13 @@ def get_website_handler(url: str) -> BaseWebsiteHandler:
return website_handler(url)


def get_number_string(value: str) -> str:
"""Return string with only digits, commas (,) and periods (.)"""
text_pattern = re.compile(r"[^\d.,]+")
result = text_pattern.sub("", value)
return result


SUPPORTED_DOMAINS: dict[str, BaseWebsiteHandler] = {
"komplett": KomplettHandler,
"proshop": ProshopHandler,
Expand Down
23 changes: 22 additions & 1 deletion tests/test_domains.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass
import pytest

from scraper.domains import get_website_name
from scraper.domains import get_website_name, get_number_string


@dataclass
Expand Down Expand Up @@ -42,3 +42,24 @@ def test_get_website_name(url: str, setting: UrlSetting, expected: str) -> None:
keep_subdomain=setting.keep_subdomain,
)
assert result == expected


test_price_values = [
("USD 12.40", "12.40"),
("$234.00", "234.00"),
("£345.37", "345.37"),
("486,89 kr", "486,89"),
("$345.37", "345.37"),
("£1345.37", "1345.37"),
("1345,37 DKK", "1345,37"),
("1345.37 DKK", "1345.37"),
("USD 1345.37", "1345.37"),
("USD 10345.37", "10345.37"),
]


@pytest.mark.parametrize("value,expected", test_price_values)
def test_get_number_string(value: str, expected: str) -> None:
result = get_number_string(value)

assert result == expected
8 changes: 4 additions & 4 deletions tests/test_objects.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
"expected_currency": "DKK"
},
"amazon": {
"link": "https://www.amazon.com/Sony-WH-1000XM5-Canceling-Headphones-Hands-Free/dp/B09XS7JWHH",
"expected_title": "Sony WH-1000XM5 The Best Wireless Noise Canceling Headphones with Auto Noise Canceling Optimizer, Crystal Clear Hands-Free Calling, and Alexa Voice Control, Black",
"expected_id": "B09XS7JWHH",
"expected_currency": "USD"
"link": "https://www.amazon.de/-/en/Google-Pixel-Pro-Smartphone-Obsidian/dp/B0DG9DD9VN",
"expected_title": "Google Pixel 9 Pro (512GB, Obsi, EU / UK) + Pixel 9/9 Pro Case, Obsidian",
"expected_id": "B0DG9DD9VN",
"expected_currency": "EUR"
},
"ebay_with_itm": {
"link": "https://www.ebay.com/itm/265771092654",
Expand Down

0 comments on commit 35a589f

Please sign in to comment.