-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathXMLProcessor.py
58 lines (44 loc) · 1.41 KB
/
XMLProcessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import xml.sax
class WikiHandler(xml.sax.ContentHandler):
__type = ''
__content = ''
__doc = {}
db = None
def __init__(self, dbConnector):
self.currentData = ""
self.db = dbConnector
def startDocument(self):
# print("开始解析xml文档...")
# print("清除数据库原有数据")
# self.db.removeDocs()
pass
def startElement(self, name, attrs):
self.__type = name
pass
def characters(self, content):
if self.__type == 'title':
self.__doc["title"] = content
elif self.__type == 'abstract':
self.__doc["abstract"] = content
elif self.__type == 'url':
self.__doc["url"] = content
self.__type = ''
def endElement(self, name):
if name == 'doc':
# 一个文档解析完毕
# print(self.__doc)
if self.db.insertWikiDoc(self.__doc):
self.__doc = {}
else:
raise InsertIntoDatabaseError()
# raise TestOverError()
def get_parser():
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
return parser
class TestOverError(Exception):
def __init__(self):
Exception("test over")
class InsertIntoDatabaseError(Exception):
def __init__(self, doc):
Exception("error happens when insertInto database")