Skip to content
This repository has been archived by the owner on Nov 20, 2022. It is now read-only.

Commit

Permalink
Updated 'mediziner-mensa' and 'fmi-bistro' PDF url (#52)
Browse files Browse the repository at this point in the history
* Updated 'mediziner-mensa' PDF url

* Fixed FMI URL

* 'fmi-bistro' fix round two
  • Loading branch information
COM8 authored and srehwald committed Sep 3, 2019
1 parent 6249635 commit 8bfee18
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions src/menu_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def __parse_dishes(menu_html, location):


class FMIBistroMenuParser(MenuParser):
url = "http://www.wilhelm-gastronomie.de/tum-garching"
url = "http://www.wilhelm-gastronomie.de/"
allergens = ["Gluten", "Laktose", "Milcheiweiß", "Hühnerei", "Soja", "Nüsse", "Erdnuss", "Sellerie", "Fisch",
"Krebstiere", "Weichtiere", "Sesam", "Senf", "Milch", "Ei"]
allergens_regex = r"(Allergene:((\s|\n)*(Gluten|Laktose|Milcheiweiß|Hühnerei|Soja|Nüsse|Erdnuss|Sellerie|Fisch|Krebstiere|Weichtiere|Sesam|Senf|Milch|Ei),?(?![\w-]))*)"
Expand All @@ -190,7 +190,7 @@ def parse(self, location):
# get html tree
tree = html.fromstring(page.content)
# get url of current pdf menu
xpath_query = tree.xpath("//a[contains(@href, 'Speiseplan')]/@href")
xpath_query = tree.xpath("//a[contains(@href, 'Garching-KW')]/@href")

if len(xpath_query) < 1:
return None
Expand Down Expand Up @@ -479,7 +479,8 @@ def get_menus(self, text, year, week_number):


class MedizinerMensaMenuParser(MenuParser):
url = "https://www.med.fs.tum.de"
startPageurl = "https://www.sv.tum.de/med/startseite/"
baseUrl = "https://www.sv.tum.de"
ingredients_regex = r"(\s([A-C]|[E-H]|[K-P]|[R-Z]|[1-9])(,([A-C]|[E-H]|[K-P]|[R-Z]|[1-9]))*(\s|\Z))"
price_regex = r"(\d+(,(\d){2})\s?€)"

Expand All @@ -506,15 +507,16 @@ def parse_dish(self, dish_str):
return Dish(dish_str, dish_price, dish_ingredients.ingredient_set)

def parse(self, location):
page = requests.get(self.url)
page = requests.get(self.startPageurl)
# get html tree
tree = html.fromstring(page.content)
# get url of current pdf menu
xpath_query = tree.xpath("//a[contains(@href, 'KW_')]/@href")
s = html.tostring(tree, encoding='utf8', method='xml')
xpath_query = tree.xpath("//a[contains(@href, 'Mensaplan/KW_')]/@href")

if len(xpath_query) != 1:
return None
pdf_url = self.url + xpath_query[0]
pdf_url = self.baseUrl + xpath_query[0]

# Example PDF-name: "KW_44_Herbst_4_Mensa_2018.pdf" or "KW_50_Winter_1_Mensa_-2018.pdf"
pdf_name = pdf_url.split("/")[-1]
Expand Down

0 comments on commit 8bfee18

Please sign in to comment.