Updated 'mediziner-mensa' and 'fmi-bistro' PDF url (#52)

* Updated 'mediziner-mensa' PDF url * Fixed FMI URL * 'fmi-bistro' fix round two
srehwald · Sep 3, 2019 · 8bfee18 · 8bfee18
1 parent 6249635
commit 8bfee18
Showing 1 changed file with 8 additions and 6 deletions.
diff --git a/src/menu_parser.py b/src/menu_parser.py
@@ -177,7 +177,7 @@ def __parse_dishes(menu_html, location):
 
 
 class FMIBistroMenuParser(MenuParser):
-    url = "http://www.wilhelm-gastronomie.de/tum-garching"
+    url = "http://www.wilhelm-gastronomie.de/"
     allergens = ["Gluten", "Laktose", "Milcheiweiß", "Hühnerei", "Soja", "Nüsse", "Erdnuss", "Sellerie", "Fisch",
                  "Krebstiere", "Weichtiere", "Sesam", "Senf", "Milch", "Ei"]
     allergens_regex = r"(Allergene:((\s|\n)*(Gluten|Laktose|Milcheiweiß|Hühnerei|Soja|Nüsse|Erdnuss|Sellerie|Fisch|Krebstiere|Weichtiere|Sesam|Senf|Milch|Ei),?(?![\w-]))*)"
@@ -190,7 +190,7 @@ def parse(self, location):
         # get html tree
         tree = html.fromstring(page.content)
         # get url of current pdf menu
-        xpath_query = tree.xpath("//a[contains(@href, 'Speiseplan')]/@href")
+        xpath_query = tree.xpath("//a[contains(@href, 'Garching-KW')]/@href")
 
         if len(xpath_query) < 1:
             return None
@@ -479,7 +479,8 @@ def get_menus(self, text, year, week_number):
 
 
 class MedizinerMensaMenuParser(MenuParser):
-    url = "https://www.med.fs.tum.de"
+    startPageurl = "https://www.sv.tum.de/med/startseite/"
+    baseUrl = "https://www.sv.tum.de"
     ingredients_regex = r"(\s([A-C]|[E-H]|[K-P]|[R-Z]|[1-9])(,([A-C]|[E-H]|[K-P]|[R-Z]|[1-9]))*(\s|\Z))"
     price_regex = r"(\d+(,(\d){2})\s?€)"
 
@@ -506,15 +507,16 @@ def parse_dish(self, dish_str):
         return Dish(dish_str, dish_price, dish_ingredients.ingredient_set)
 
     def parse(self, location):
-        page = requests.get(self.url)
+        page = requests.get(self.startPageurl)
         # get html tree
         tree = html.fromstring(page.content)
         # get url of current pdf menu
-        xpath_query = tree.xpath("//a[contains(@href, 'KW_')]/@href")
+        s = html.tostring(tree, encoding='utf8', method='xml')
+        xpath_query = tree.xpath("//a[contains(@href, 'Mensaplan/KW_')]/@href")
 
         if len(xpath_query) != 1:
             return None
-        pdf_url = self.url + xpath_query[0]
+        pdf_url = self.baseUrl + xpath_query[0]
 
         # Example PDF-name: "KW_44_Herbst_4_Mensa_2018.pdf" or "KW_50_Winter_1_Mensa_-2018.pdf"
         pdf_name = pdf_url.split("/")[-1]