-
Notifications
You must be signed in to change notification settings - Fork 524
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ah.nl: added support for recipes missing schema.org instructions (#580)
- Loading branch information
1 parent
271404a
commit 5f8f0b8
Showing
5 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import re | ||
|
||
from ._abstract import AbstractScraper | ||
from ._utils import normalize_string | ||
|
||
|
||
class AlbertHeijn(AbstractScraper): | ||
@classmethod | ||
def host(cls): | ||
return "ah.nl" | ||
|
||
def author(self): | ||
return self.schema.author() | ||
|
||
def title(self): | ||
return self.schema.title() | ||
|
||
def category(self): | ||
return self.schema.category() | ||
|
||
def total_time(self): | ||
return self.schema.total_time() | ||
|
||
def yields(self): | ||
return self.schema.yields() | ||
|
||
def image(self): | ||
return self.schema.image() | ||
|
||
def ingredients(self): | ||
return self.schema.ingredients() | ||
|
||
def instructions(self): | ||
# try schema.org | ||
instructions = self.schema.instructions() | ||
if instructions: | ||
return instructions | ||
|
||
instructions = [ | ||
step | ||
# get steps root | ||
for root in self.soup.findAll( | ||
"div", | ||
{"class", re.compile("recipe-preparation-steps_root.*")}, | ||
) | ||
# get steps | ||
for step in root.findAll("p") | ||
] | ||
|
||
return "\n".join( | ||
[normalize_string(instruction.get_text()) for instruction in instructions] | ||
) | ||
|
||
def ratings(self): | ||
return self.schema.ratings() | ||
|
||
def cuisine(self): | ||
return self.schema.cuisine() | ||
|
||
def description(self): | ||
return self.schema.description() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from recipe_scrapers.albertheijn import AlbertHeijn | ||
from tests import ScraperTest | ||
|
||
|
||
class TestAlbertHeijnScraper(ScraperTest): | ||
|
||
scraper_class = AlbertHeijn | ||
|
||
def test_host(self): | ||
self.assertEqual("ah.nl", self.harvester_class.host()) | ||
|
||
def test_author(self): | ||
self.assertEqual("Albert Heijn", self.harvester_class.author()) | ||
|
||
def test_title(self): | ||
self.assertEqual("Kruidige groentecalzone", self.harvester_class.title()) | ||
|
||
def test_category(self): | ||
self.assertEqual("hoofdgerecht", self.harvester_class.category()) | ||
|
||
def test_total_time(self): | ||
self.assertEqual(35, self.harvester_class.total_time()) | ||
|
||
def test_yields(self): | ||
self.assertEqual("2 servings", self.harvester_class.yields()) | ||
|
||
def test_image(self): | ||
self.assertEqual( | ||
"https://static.ah.nl/static/recepten/img_001329_890x594_JPG.jpg", | ||
self.harvester_class.image(), | ||
) | ||
|
||
def test_ingredients(self): | ||
self.assertEqual( | ||
[ | ||
"1 kleine rode ui", | ||
"1 courgette", | ||
"1 rode paprika", | ||
"0.5 eetlepel Italiaanse kruiden", | ||
"2 eetlepels olijfolie", | ||
"2 theelepels knoflookpuree", | ||
"1 pakje pizza & tomato", | ||
"1 pakje walnootkaas", | ||
"0.5 eetlepel olie om in te vetten", | ||
], | ||
self.harvester_class.ingredients(), | ||
) | ||
|
||
def test_instructions(self): | ||
self.assertEqual( | ||
"Oven voorverwarmen op 200 °C of gasovenstand 4. Ui, courgette en paprika schoonmaken en in kleine blokjes snijden. In kom groenten, kruiden, olijfolie, knoflookpuree en inhoud van potje tomatensaus door elkaar scheppen.Op aanrecht pizzadeeg uitspreiden, iets uitrollen en in twee gelijke stukken snijden. Plakken kaas naast elkaar erop leggen, ca. 2 cm van randen vrijhouden. Op helft van elk stuk courgettemengsel scheppen. Andere helft over vulling klappen en randen van deeg tussen duim en wijsvinger tot mooie schulprand dichtknijpen. Bakplaat invetten. Calzones erop leggen en in midden van oven in ca. 25 minuten goudbruin en gaarbakken.", | ||
self.harvester_class.instructions(), | ||
) | ||
|
||
def test_ratings(self): | ||
self.assertEqual(3.08, self.harvester_class.ratings()) | ||
|
||
def test_cuisine(self): | ||
self.assertEqual("italiaans", self.harvester_class.cuisine()) | ||
|
||
def test_description(self): | ||
self.assertEqual("", self.harvester_class.description()) |
Oops, something went wrong.