-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathtest.py
137 lines (103 loc) · 5.82 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import json
import unittest
from microdata import get_items, Item, URI
class MicrodataParserTest(unittest.TestCase):
def test_parse(self):
# parse the html for microdata
items = get_items(open("test-data/example.html"))
# this html should have just one main item
self.assertTrue(len(items), 1)
item = items[0]
# item's type should be set
self.assertEqual(item.itemtype, [URI("http://schema.org/Person")])
# test simple case of a single valued property
self.assertEqual(item.name, "Jane Doe")
# but object properties can have multiple values ...
# basic accessor returns the first value
self.assertEqual(item.colleagues,
URI("http://www.xyz.edu/students/alicejones.html"))
# and get_all, well, gets them all of course :)
self.assertEqual(item.get_all("colleagues"),
[URI("http://www.xyz.edu/students/alicejones.html"),
URI("http://www.xyz.edu/students/bobsmith.html")])
# address should be another item
self.assertTrue(isinstance(item.address, Item))
self.assertEqual(item.address.itemtype, [URI("http://schema.org/PostalAddress")])
self.assertTrue(item.address.addressLocality, "Seattle")
# <script> tag should be ignored in the content text
self.assertFalse("Unrelated text" in item.address.streetAddress)
# json
i = json.loads(item.json())
self.assertEqual(i["properties"]["name"][0], "Jane Doe")
self.assertEqual(i["type"], ["http://schema.org/Person"])
self.assertEqual(i["id"], "http://www.xyz.edu/~jane")
self.assertTrue(isinstance(i["properties"]["address"][0], dict))
self.assertEqual(i["properties"]["address"][0]["properties"]["addressLocality"][0], "Seattle")
def test_parse_nested(self):
# parse the html for microdata
items = get_items(open("test-data/example-nested.html"))
# this html should have just one main item
self.assertTrue(len(items), 1)
item = items[0]
# item's type should be set
self.assertEqual(item.itemtype, [URI("http://schema.org/Event")])
# test case of a nested itemprop
self.assertEqual(item.name.strip(), "Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1)")
# test case of a nested itemscope
self.assertTrue(isinstance(item.location, Item))
self.assertEqual(item.location.itemtype, [URI("http://schema.org/Place")])
self.assertEqual(item.location.url, URI("wells-fargo-center.html"))
# address should be a nested item
self.assertTrue(isinstance(item.location.address, Item))
self.assertEqual(item.location.address.itemtype, [URI("http://schema.org/PostalAddress")])
self.assertTrue(item.location.address.addressLocality, "Philadelphia")
# json
i = json.loads(item.json())
self.assertEqual(i["properties"]["name"][0].strip(), "Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1)")
self.assertEqual(i["type"], ["http://schema.org/Event"])
self.assertEqual(i["properties"]["url"], ["nba-miami-philidelphia-game3.html"])
self.assertTrue(isinstance(i["properties"]["location"][0], dict))
self.assertEqual(i["properties"]["location"][0]["properties"]["url"][0], "wells-fargo-center.html")
self.assertTrue(isinstance(i["properties"]["location"][0]["properties"]["address"][0], dict))
self.assertEqual(i["properties"]["location"][0]["properties"]["address"][0]["properties"]["addressLocality"][0], "Philadelphia")
def test_parse_unlinked(self):
items = get_items(open("test-data/unlinked.html"))
self.assertEqual(len(items), 2)
i = items[0]
self.assertEqual(i.itemtype, [URI("http://schema.org/Person")])
self.assertEqual(i.name, "Jane Doe")
self.assertEqual(i.streetAddress, None)
# this PostalAddress is enclosed within the Person but it is
# not linked via the streetAddress itemprop. This particular example
# would represent a bug in the markup, but technically items can appear
# within other items without them being related together with an
# itemprop.
i = items[1]
self.assertEqual(i.itemtype, [URI("http://schema.org/PostalAddress")])
self.assertTrue('Whitworth' in i.streetAddress)
def test_skip_level(self):
items = get_items(open("test-data/skip-level.html"))
self.assertEqual(len(items), 1)
self.assertEqual(items[0].name, "Jane Doe")
def test_parse_multiple_props(self):
items = get_items(open("test-data/multiple-props.html"))
self.assertEqual(len(items), 2)
item = items[0]
i = json.loads(item.json())
# both names `John Doe and Jane Dun` should appear under author and creator props
self.assertEqual(len(i["properties"]["author"][0]["properties"]["name"]), 2)
self.assertEqual(i["properties"]["author"][0]["properties"]["name"], ["John Doe", "Jane Dun"])
self.assertTrue(len(i["properties"]["creator"][0]["properties"]["name"]), 2)
self.assertEqual(i["properties"]["creator"][0]["properties"]["name"], ["John Doe", "Jane Dun"])
# nested multiple props
self.assertEqual(item.author.affiliation.name, "Stanford University")
self.assertEqual(item.creator.affiliation.name, "Stanford University")
self.assertEqual(item.author.alumniOf.name, "Stanford University")
self.assertEqual(item.creator.alumniOf.name, "Stanford University")
item = items[1]
i = json.loads(item.json())
# test case for original issue #3
self.assertTrue(i["properties"]["favorite-color"][0], "orange")
self.assertTrue(i["properties"]["favorite-fruit"][0], "orange")
if __name__ == "__main__":
unittest.main()