-
Notifications
You must be signed in to change notification settings - Fork 0
/
sort_LaTex_bib.py
97 lines (63 loc) · 2.43 KB
/
sort_LaTex_bib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
# coding: utf-8
'''
Sort LaTex bibliography elements alphabetically based on first author, not on type (book, article, inproceedings,...)
'''
import argparse
def arg_parser():
"""
Arguments to parse for model if run with console.
:return args: dict of param names (str) and values (str)
"""
parser = argparse.ArgumentParser(description='Argument parser')
parser.add_argument('-f', '--file_name', metavar='str', type=str, help='Bibliography file name, including .bib', required=True)
args = vars(parser.parse_args())
return args
file_name = arg_parser()['file_name']
# Ancillary text parsing functions
def extract_data_lines(filename, start_text, end_text, include_start=False,
include_end=False):
"""
open `filename`, and yield the lines between
the line that contains `start_text` and the line that contains `end_text`.
"""
started = False
with open(filename) as fh:
for line in fh:
if started:
if end_text in line:
if include_end:
yield line
break
yield line
elif start_text in line:
started = True
if include_start:
yield line
def find_data_lines(filename, text):
"""
open `filename`, and yield the line that starts with `text`
"""
with open(filename) as fh:
li = []
for line in fh:
if line.startswith("@"):
li.append(line.split(",")[0]) # Remove ",\n" from lines, only retrieve text
return li
# FIND TEXT BETWEEN EACH ENTRY
start_entries = find_data_lines(file_name, '@')
# For the last entry, use the first one as end_text
end_entries = start_entries[1:] + [start_entries[0]]
# Empty list of empty lists
ads_groups = [ [] for i in range(len(start_entries)) ]
for s, st in enumerate(start_entries):
for line in extract_data_lines(file_name, st, end_entries[s],
include_start=True, include_end=False):
ads_groups[s].append(line)
# SORT GROUPS
sorted_ads_groups = sorted(ads_groups, key=lambda x: x[0].split('{')[1])
# WRITE TO FINAL .BIB FILE
with open('{}_sorted.bib'.format(file_name.split('.')[0]), 'w') as text_file:
for group in sorted_ads_groups:
for line in group:
text_file.write(line)