forked from mswart/openmensa-parsers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hannover.py
53 lines (46 loc) · 1.82 KB
/
hannover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!python3
from urllib.request import urlopen
import re
from xml.dom.minidom import Document
from pyopenmensa.feed import LazyBuilder
day_regex = re.compile('(?P<date>\d{2}\.\d{2}\.\d{4})')
price_regex = re.compile('(?P<price>\d+[,.]\d{2})€')
note_regex = re.compile('\((?P<number>[a-z0-9]+?)\)')
legend_regex = re.compile('\((?P<number>\w+)\) ?(?P<value>\w+(\s+\w+)*)')
meal_regex = re.compile('(?P<category>(\w|\s|\(|\))+):\s*(?P<meal>(\w|\s)+)')
roles = ('student', 'employee', 'other')
def parse_week(url, canteen):
document = urlopen(url).read().decode('utf8').split('\n')
legends = {v.group('number'): v.group('value') for v in map(lambda v: legend_regex.match(v), document) if v}
date = None
for line in document:
if not date:
test = day_regex.search(line)
if test:
date = test.group('date')
continue
if 'geschlossen' in line.lower():
canteen.setDayClosed(date)
if not line.startswith('>'):
date = None
continue
mealtest = meal_regex.search(line)
if not mealtest:
print('unable to parse category/meal: "{}"'.format(line))
continue
category = mealtest.group('category').strip()
name = mealtest.group('meal').strip()
notes = []
for notematch in note_regex.findall(line):
if notematch not in legends:
print('unknown legend: {}'.format(notematch))
continue
notes.append(legends[notematch])
canteen.addMeal(date, category, name, notes,
price_regex.findall(line), roles)
def parse_url(url, today=False):
canteen = LazyBuilder()
parse_week(url + '&wann=2', canteen)
if not today:
parse_week(url + '&wann=3', canteen)
return canteen.toXMLFeed()