Skip to content

Commit

Permalink
ah.nl: added support for recipes missing schema.org instructions (#580)
Browse files Browse the repository at this point in the history
  • Loading branch information
arpachuilo authored Jul 24, 2022
1 parent 271404a commit 5f8f0b8
Show file tree
Hide file tree
Showing 5 changed files with 353 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ Scrapers available for:
- `https://www.acouplecooks.com <https://acouplecooks.com/>`_
- `http://www.afghankitchenrecipes.com/ <http://www.afghankitchenrecipes.com/>`_
- `https://akispetretzikis.com/ <https://akispetretzikis.com/>`_
- `https://ah.nl/ <https://ah.nl/>`_
- `https://allrecipes.com/ <https://allrecipes.com/>`_
- `https://alltommat.se/ <https://alltommat.se/>`_
- `https://altonbrown.com/ <https://altonbrown.com/>`_
Expand Down
2 changes: 2 additions & 0 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .acouplecooks import ACoupleCooks
from .afghankitchenrecipes import AfghanKitchenRecipes
from .akispetretzikis import AkisPetretzikis
from .albertheijn import AlbertHeijn
from .allrecipes import AllRecipes
from .alltomat import AllTomat
from .altonbrown import AltonBrown
Expand Down Expand Up @@ -212,6 +213,7 @@
Abril.host(): Abril,
AfghanKitchenRecipes.host(): AfghanKitchenRecipes,
AkisPetretzikis.host(): AkisPetretzikis,
AlbertHeijn.host(): AlbertHeijn,
AllRecipes.host(): AllRecipes,
AllTomat.host(): AllTomat,
AltonBrown.host(): AltonBrown,
Expand Down
61 changes: 61 additions & 0 deletions recipe_scrapers/albertheijn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import re

from ._abstract import AbstractScraper
from ._utils import normalize_string


class AlbertHeijn(AbstractScraper):
@classmethod
def host(cls):
return "ah.nl"

def author(self):
return self.schema.author()

def title(self):
return self.schema.title()

def category(self):
return self.schema.category()

def total_time(self):
return self.schema.total_time()

def yields(self):
return self.schema.yields()

def image(self):
return self.schema.image()

def ingredients(self):
return self.schema.ingredients()

def instructions(self):
# try schema.org
instructions = self.schema.instructions()
if instructions:
return instructions

instructions = [
step
# get steps root
for root in self.soup.findAll(
"div",
{"class", re.compile("recipe-preparation-steps_root.*")},
)
# get steps
for step in root.findAll("p")
]

return "\n".join(
[normalize_string(instruction.get_text()) for instruction in instructions]
)

def ratings(self):
return self.schema.ratings()

def cuisine(self):
return self.schema.cuisine()

def description(self):
return self.schema.description()
62 changes: 62 additions & 0 deletions tests/test_albertheijn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from recipe_scrapers.albertheijn import AlbertHeijn
from tests import ScraperTest


class TestAlbertHeijnScraper(ScraperTest):

scraper_class = AlbertHeijn

def test_host(self):
self.assertEqual("ah.nl", self.harvester_class.host())

def test_author(self):
self.assertEqual("Albert Heijn", self.harvester_class.author())

def test_title(self):
self.assertEqual("Kruidige groentecalzone", self.harvester_class.title())

def test_category(self):
self.assertEqual("hoofdgerecht", self.harvester_class.category())

def test_total_time(self):
self.assertEqual(35, self.harvester_class.total_time())

def test_yields(self):
self.assertEqual("2 servings", self.harvester_class.yields())

def test_image(self):
self.assertEqual(
"https://static.ah.nl/static/recepten/img_001329_890x594_JPG.jpg",
self.harvester_class.image(),
)

def test_ingredients(self):
self.assertEqual(
[
"1 kleine rode ui",
"1 courgette",
"1 rode paprika",
"0.5 eetlepel Italiaanse kruiden",
"2 eetlepels olijfolie",
"2 theelepels knoflookpuree",
"1 pakje pizza & tomato",
"1 pakje walnootkaas",
"0.5 eetlepel olie om in te vetten",
],
self.harvester_class.ingredients(),
)

def test_instructions(self):
self.assertEqual(
"Oven voorverwarmen op 200 °C of gasovenstand 4. Ui, courgette en paprika schoonmaken en in kleine blokjes snijden. In kom groenten, kruiden, olijfolie, knoflookpuree en inhoud van potje tomatensaus door elkaar scheppen.Op aanrecht pizzadeeg uitspreiden, iets uitrollen en in twee gelijke stukken snijden. Plakken kaas naast elkaar erop leggen, ca. 2 cm van randen vrijhouden. Op helft van elk stuk courgettemengsel scheppen. Andere helft over vulling klappen en randen van deeg tussen duim en wijsvinger tot mooie schulprand dichtknijpen. Bakplaat invetten. Calzones erop leggen en in midden van oven in ca. 25 minuten goudbruin en gaarbakken.",
self.harvester_class.instructions(),
)

def test_ratings(self):
self.assertEqual(3.08, self.harvester_class.ratings())

def test_cuisine(self):
self.assertEqual("italiaans", self.harvester_class.cuisine())

def test_description(self):
self.assertEqual("", self.harvester_class.description())
Loading

0 comments on commit 5f8f0b8

Please sign in to comment.