Merge branch 'develop'

TandoorRecipes · Nov 12, 2024 · 100b75a · 100b75a
2 parents 4c0ace1 + 7dba36d
commit 100b75a
Show file tree

Hide file tree

Showing 22 changed files with 2,824 additions and 1,099 deletions.
diff --git a/cookbook/forms.py b/cookbook/forms.py
@@ -89,12 +89,13 @@ class ImportExportBase(forms.Form):
     COOKMATE = 'COOKMATE'
     REZEPTSUITEDE = 'REZEPTSUITEDE'
     PDF = 'PDF'
+    GOURMET = 'GOURMET'
 
     type = forms.ChoiceField(choices=((DEFAULT, _('Default')), (PAPRIKA, 'Paprika'), (NEXTCLOUD, 'Nextcloud Cookbook'), (MEALIE, 'Mealie'), (CHOWDOWN, 'Chowdown'),
                                       (SAFFRON, 'Saffron'), (CHEFTAP, 'ChefTap'), (PEPPERPLATE, 'Pepperplate'), (RECETTETEK, 'RecetteTek'), (RECIPESAGE, 'Recipe Sage'),
                                       (DOMESTICA, 'Domestica'), (MEALMASTER, 'MealMaster'), (REZKONV, 'RezKonv'), (OPENEATS, 'Openeats'), (RECIPEKEEPER, 'Recipe Keeper'),
                                       (PLANTOEAT, 'Plantoeat'), (COOKBOOKAPP, 'CookBookApp'), (COPYMETHAT, 'CopyMeThat'), (PDF, 'PDF'), (MELARECIPES, 'Melarecipes'),
-                                      (COOKMATE, 'Cookmate'), (REZEPTSUITEDE, 'Recipesuite.de')))
+                                      (COOKMATE, 'Cookmate'), (REZEPTSUITEDE, 'Recipesuite.de'), (GOURMET, 'Gourmet')))
 
 
 class MultipleFileInput(forms.ClearableFileInput):

diff --git a/cookbook/integration/gourmet.py b/cookbook/integration/gourmet.py
@@ -0,0 +1,211 @@
+import base64
+from io import BytesIO
+from lxml import etree
+import requests
+from pathlib import Path
+
+from bs4 import BeautifulSoup, Tag
+
+from cookbook.helper.HelperFunctions import validate_import_url
+from cookbook.helper.ingredient_parser import IngredientParser
+from cookbook.helper.recipe_url_import import parse_servings, parse_servings_text, parse_time, iso_duration_to_minutes
+from cookbook.integration.integration import Integration
+from cookbook.models import Ingredient, Recipe, Step, Keyword
+from recipe_scrapers import scrape_html
+
+
+class Gourmet(Integration):
+
+    def split_recipe_file(self, file):
+        encoding = 'utf-8'
+        byte_string = file.read()
+        text_obj = byte_string.decode(encoding, errors="ignore")
+        soup = BeautifulSoup(text_obj, "html.parser")
+        return soup.find_all("div", {"class": "recipe"})
+
+    def get_ingredients_recursive(self, step, ingredients, ingredient_parser):
+        if isinstance(ingredients, Tag):
+            for ingredient in ingredients.children:
+                if not isinstance(ingredient, Tag):
+                    continue
+
+                if ingredient.name in ["li"]:
+                    step_name = "".join(ingredient.findAll(text=True, recursive=False)).strip().rstrip(":")
+
+                    step.ingredients.add(Ingredient.objects.create(
+                        is_header=True,
+                        note=step_name[:256],
+                        original_text=step_name,
+                        space=self.request.space,
+                    ))
+                    next_ingrediets = ingredient.find("ul", {"class": "ing"})
+                    self.get_ingredients_recursive(step, next_ingrediets, ingredient_parser)
+
+                else:
+                    try:
+                        amount, unit, food, note = ingredient_parser.parse(ingredient.text.strip())
+                        f = ingredient_parser.get_food(food)
+                        u = ingredient_parser.get_unit(unit)
+                        step.ingredients.add(
+                            Ingredient.objects.create(
+                                food=f,
+                                unit=u,
+                                amount=amount,
+                                note=note,
+                                original_text=ingredient.text.strip(),
+                                space=self.request.space,
+                            )
+                        )
+                    except ValueError:
+                        pass
+
+    def get_recipe_from_file(self, file):
+        # 'file' comes is as a beautifulsoup object
+
+        source_url = None
+        for item in file.find_all('a'):
+            if item.has_attr('href'):
+                source_url = item.get("href")
+                break
+
+        name = file.find("p", {"class": "title"}).find("span", {"itemprop": "name"}).text.strip()
+
+        recipe = Recipe.objects.create(
+            name=name[:128],
+            source_url=source_url,
+            created_by=self.request.user,
+            internal=True,
+            space=self.request.space,
+        )
+
+        for category in file.find_all("span", {"itemprop": "recipeCategory"}):
+            keyword, created = Keyword.objects.get_or_create(name=category.text, space=self.request.space)
+            recipe.keywords.add(keyword)
+
+        try:
+            recipe.servings = parse_servings(file.find("span", {"itemprop": "recipeYield"}).text.strip())
+        except AttributeError:
+            pass
+
+        try:
+            prep_time = file.find("span", {"itemprop": "prepTime"}).text.strip().split()
+            prep_time[0] = prep_time[0].replace(',', '.')
+            if prep_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']:
+                prep_time_min = int(float(prep_time[0]) * 60)
+            elif prep_time[1].lower() in ['tag', 'tage', 'day', 'days']:
+                prep_time_min = int(float(prep_time[0]) * 60 * 24)
+            else:
+                prep_time_min = int(prep_time[0])
+            recipe.waiting_time = prep_time_min
+        except AttributeError:
+            pass
+
+        try:
+            cook_time = file.find("span", {"itemprop": "cookTime"}).text.strip().split()
+            cook_time[0] = cook_time[0].replace(',', '.')
+            if cook_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']:
+                cook_time_min = int(float(cook_time[0]) * 60)
+            elif cook_time[1].lower() in ['tag', 'tage', 'day', 'days']:
+                cook_time_min = int(float(cook_time[0]) * 60 * 24)
+            else:
+                cook_time_min = int(cook_time[0])
+
+            recipe.working_time = cook_time_min
+        except AttributeError:
+            pass
+
+        for cuisine in file.find_all('span', {'itemprop': 'recipeCuisine'}):
+            cuisine_name = cuisine.text
+            keyword = Keyword.objects.get_or_create(space=self.request.space, name=cuisine_name)
+            if len(keyword):
+                recipe.keywords.add(keyword[0])
+
+        for category in file.find_all('span', {'itemprop': 'recipeCategory'}):
+            category_name = category.text
+            keyword = Keyword.objects.get_or_create(space=self.request.space, name=category_name)
+            if len(keyword):
+                recipe.keywords.add(keyword[0])
+
+        step = Step.objects.create(
+            instruction='',
+            space=self.request.space,
+            show_ingredients_table=self.request.user.userpreference.show_step_ingredients,
+        )
+
+        ingredient_parser = IngredientParser(self.request, True)
+
+        ingredients = file.find("ul", {"class": "ing"})
+        self.get_ingredients_recursive(step, ingredients, ingredient_parser)
+
+        instructions = file.find("div", {"class": "instructions"})
+        if isinstance(instructions, Tag):
+            for instruction in instructions.children:
+                if not isinstance(instruction, Tag) or instruction.text == "":
+                    continue
+                if instruction.name == "h3":
+                    if step.instruction:
+                        step.save()
+                        recipe.steps.add(step)
+                        step = Step.objects.create(
+                            instruction='',
+                            space=self.request.space,
+                        )
+
+                    step.name = instruction.text.strip()[:128]
+                else:
+                    if instruction.name == "div":
+                        for instruction_step in instruction.children:
+                            for br in instruction_step.find_all("br"):
+                                br.replace_with("\n")
+                            step.instruction += instruction_step.text.strip() + ' \n\n'
+
+        notes = file.find("div", {"class": "modifications"})
+        if notes:
+            for n in notes.children:
+                if n.text == "":
+                    continue
+                if n.name == "h3":
+                    step.instruction += f'*{n.text.strip()}:* \n\n'
+                else:
+                    for br in n.find_all("br"):
+                        br.replace_with("\n")
+
+                    step.instruction += '*' + n.text.strip() + '* \n\n'
+
+        description = ''
+        try:
+            description = file.find("div", {"id": "description"}).text.strip()
+        except AttributeError:
+            pass
+        if len(description) <= 512:
+            recipe.description = description
+        else:
+            recipe.description = description[:480] + ' ... (full description below)'
+            step.instruction += '*Description:* \n\n*' + description + '* \n\n'
+
+        step.save()
+        recipe.steps.add(step)
+
+        # import the Primary recipe image that is stored in the Zip
+        try:
+            image_path = file.find("img").get("src")
+            image_filename = image_path.split("\\")[1]
+
+            for f in self.import_zip.filelist:
+                zip_file_name = Path(f.filename).name
+                if image_filename == zip_file_name:
+                    image_file = self.import_zip.read(f)
+                    image_bytes = BytesIO(image_file)
+                    self.import_recipe_image(recipe, image_bytes, filetype='.jpeg')
+                    break
+        except Exception as e:
+            print(recipe.name, ': failed to import image ', str(e))
+
+        recipe.save()
+        return recipe
+
+    def get_files_from_recipes(self, recipes, el, cookie):
+        raise NotImplementedError('Method not implemented in storage integration')
+
+    def get_file_from_recipe(self, recipe):
+        raise NotImplementedError('Method not implemented in storage integration')
diff --git a/cookbook/integration/integration.py b/cookbook/integration/integration.py
@@ -153,6 +153,19 @@ def do_import(self, files, il, import_duplicates):
                             il.total_recipes = len(new_file_list)
                             file_list = new_file_list
 
+                        if isinstance(self, cookbook.integration.gourmet.Gourmet):
+                            self.import_zip = import_zip
+                            new_file_list = []
+                            for file in file_list:
+                                if file.file_size == 0:
+                                    next
+                                if file.filename.startswith("index.htm"):
+                                    next
+                                if file.filename.endswith(".htm"):
+                                    new_file_list += self.split_recipe_file(BytesIO(import_zip.read(file.filename)))
+                            il.total_recipes = len(new_file_list)
+                            file_list = new_file_list
+
                         for z in file_list:
                             try:
                                 if not hasattr(z, 'filename') or isinstance(z, Tag):

diff --git a/cookbook/integration/mealie.py b/cookbook/integration/mealie.py
@@ -72,14 +72,14 @@ def get_recipe_from_file(self, file):
             )
             recipe.steps.add(step)
 
-        if 'recipe_yield' in recipe_json:
+        if 'recipe_yield' in recipe_json and recipe_json['recipe_yield'] is not None:
             recipe.servings = parse_servings(recipe_json['recipe_yield'])
             recipe.servings_text = parse_servings_text(recipe_json['recipe_yield'])
 
         if 'total_time' in recipe_json and recipe_json['total_time'] is not None:
             recipe.working_time = parse_time(recipe_json['total_time'])
 
-        if 'org_url' in recipe_json:
+        if 'org_url' in recipe_json and recipe_json['org_url'] is not None:
             recipe.source_url = recipe_json['org_url']
 
         recipe.save()

diff --git a/cookbook/integration/paprika.py b/cookbook/integration/paprika.py
@@ -84,13 +84,22 @@ def get_recipe_from_file(self, file):
 
             recipe.steps.add(step)
 
+            # Paprika exports can have images in either of image_url, or photo_data.
+            # If a user takes an image himself, only photo_data will be set.
+            # If a user imports an image, both will be set. But the photo_data will be a center-cropped square resized version, so the image_url is preferred.
+
+            # Try to download image if possible
             try:
                 if recipe_json.get("image_url", None):
                     url = recipe_json.get("image_url", None)
                     if validate_import_url(url):
                         response = requests.get(url)
                         self.import_recipe_image(recipe, BytesIO(response.content))
             except Exception:
+                pass
+
+            # If no image downloaded, try to extract from photo_data
+            if not recipe.image:
                 if recipe_json.get("photo_data", None):
                     self.import_recipe_image(recipe, BytesIO(base64.b64decode(recipe_json['photo_data'])), filetype='.jpeg')