diff --git a/flathunter/crawler/immowelt.py b/flathunter/crawler/immowelt.py index 5dc1076e..4117607e 100644 --- a/flathunter/crawler/immowelt.py +++ b/flathunter/crawler/immowelt.py @@ -86,7 +86,9 @@ def extract_data(self, soup: BeautifulSoup): id_element = adv.find("a") try: - url = "https://immowelt.de" + id_element.get("href") + url = id_element.get("href") + if "https" not in url: + url = "https://immowelt.de/" + url except IndexError: continue diff --git a/test/crawler/test_crawl_immowelt.py b/test/crawler/test_crawl_immowelt.py index d482856d..3aa4592d 100644 --- a/test/crawler/test_crawl_immowelt.py +++ b/test/crawler/test_crawl_immowelt.py @@ -6,10 +6,10 @@ DUMMY_CONFIG = """ urls: - - https://www.immowelt.de/liste/muenchen/wohnungen/mieten?roomi=2&primi=600&prima=1000 + - https://www.immowelt.de/classified-search?distributionTypes=Rent&estateTypes=House,Apartment&locations=AD08DE8634&order=Default&m=homepage_new_search_classified_search_result """ -TEST_URL = 'https://www.immowelt.de/liste/berlin/wohnungen/mieten?roomi=2&prima=1500&wflmi=70&sort=createdate%2Bdesc' +TEST_URL = 'https://www.immowelt.de/classified-search?distributionTypes=Rent&estateTypes=House,Apartment&locations=AD08DE8634&order=Default&m=homepage_new_search_classified_search_result' @pytest.fixture def crawler():