forked from larskotthoff/assurvey
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
132 lines (105 loc) · 4.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on Oct 9, 2014
@author: marius lindauer
'''
import os
import sys
import time
import json
from pybtex.database.input import bibtex
class BIBHTML(object):
def __init__(self):
'''
constructor
'''
self._BIBFILES = ["./bib/strings.bib", "./bib/lib.bib", "./bib/proc.bib"]
#self._BIBFILES = ["/home/lindauer/workspace/acsurvey/bib/lib.bib", "/home/lindauer/workspace/acsurvey/bib/proc.bib"]
self.bibdata = None
#self.ACTIVE_KEYS = ["Algorithm Configuration", "Algorithm Schedules", "Algorithm Selection", "Hyper-Parameter Optimization"]
self.ACTIVE_KEYS = ["Algorithm Configuration", "Global Optimization", "Algorithm Analysis"]
def main(self):
self.read_bib()
self.resolve_crossref()
entry_list = self.collect_by_entries()
self.sort_by_year(entry_list)
self.write_json(entry_list)
def read_bib(self):
parser = bibtex.Parser()
for bib_file in self._BIBFILES:
self.bibdata = parser.parse_file(bib_file)
def resolve_crossref(self):
for entry in self.bibdata.entries.values():
if entry.fields.get("crossref"):
cross_ref = entry.get_crossref()
if not cross_ref.fields.get("booktitle"):
sys.stderr.write("[WARNING] Missing booktitle at %s\n" %(cross_ref))
for key, value in cross_ref.fields.items():
if key == "title":
continue
entry.fields[key] = value
#print(cross_ref)
def collect_by_entries(self):
return self.bibdata.entries.values()
def sort_by_year(self,entry_list):
entry_list.sort(key=lambda x: int(x.fields.get("year",1900)), reverse=True)
def write_json(self, entry_list):
lit_list = []
for entry in entry_list:
if entry.original_type.upper() == "PROCEEDINGS":
continue
active = False
if entry.fields.get("keywords") and entry.fields.get("keywords") != "":
keys = sorted(map(lambda x: x.strip(" "), entry.fields.get("keywords").split(",")))
for k in keys:
if k in self.ACTIVE_KEYS:
active = True
entry.fields["keywords"] = ", ".join(keys)
break
if not active:
sys.stderr.write("[WARNING]: Skipped: %s" %(entry.fields.get("title")))
continue
list_entry = {"Year":1900, "Citation":{"short": "NA", "long": "NA"}, "Domain": "N/A", "Category": "N/A"}
key_order = [("author", "%s<br/>"), ("title", "%s.<br/>"),
("booktitle", "%s.<br/>"), ("journal", "%s.<br/>"),
("pages", "p. %s. "), ("year", "%s.<br/>")]
long_array = []
for k,form in key_order:
if entry.fields.get(k) and entry.fields.get(k) != "":
long_array.append(form %(self.format_string(entry.fields.get(k))))
list_entry["Citation"]["long"] = "".join(long_array)
#print(self.format_string(entry.fields.get("author")))
list_entry["Citation"]["short"] = self.format_string(entry.fields.get("author"))+" ".encode("utf-8") \
+ self.format_string(entry.fields.get("year"))
if entry.fields.get("year") and entry.fields.get("year") != "":
list_entry["Year"] = entry.fields.get("year")
if entry.fields.get("domain") and entry.fields.get("domain") != "":
list_entry["Domain"] = ", ".join(sorted(map(lambda x: x.strip(), entry.fields.get("domain").split(","))))
if entry.fields.get("keywords") and entry.fields.get("keywords") != "":
list_entry["Category"] = entry.fields.get("keywords")
lit_list.append(list_entry)
with open("lit.json", "w") as fp:
json.dump(lit_list,fp)
def format_string(self, str_):
str_ = str_.encode("utf-8")
replaces = [("{", ""),
("}", ""),
("\\\"u", "ü"), #list to ensure order
("\\\"o", "ö"),
("\\\"a", "ä"),
("\\'o", "ó"),
("\\'a", "á"),
("\\'e", "é"),
("\\^e", "ê"),
("\\'c", "ć"),
("~", ""),
("\\" , ""),
("\"", ""),
]
for s,r in replaces:
str_ = str_.replace(s,r)
return str_
if __name__ == '__main__':
bib = BIBHTML()
bib.main()