@@ -20,30 +20,36 @@ def extract_citation_keys(bib_file_path):
20
20
citation_keys .append (match .group (1 ))
21
21
return citation_keys
22
22
23
-
24
- def find_bare_citations (markdown_file_path :Path , citation_keys :list )-> list :
25
-
23
+ def find_bare_citations (markdown_file_path : Path , citation_keys : list ) -> list :
26
24
content = markdown_file_path .read_text ()
27
25
28
26
# Remove HTML comments. regex from https://stackoverflow.com/a/28208465
29
- content = re .sub ("( <!--.*?-->) " , "" , content , flags = re .DOTALL )
27
+ content = re .sub (r" <!--.*?-->" , "" , content , flags = re .DOTALL )
30
28
31
29
# Remove Markdown code blocks, regex from https://stackoverflow.com/a/64116935
32
30
markdown_code_block_pattern = re .compile (r'```[^`]*```' , re .DOTALL )
33
-
34
31
content = markdown_code_block_pattern .sub ('' , content )
35
32
36
33
for citation_key in citation_keys :
37
-
38
- # magical regex from ChatGPT: captures the whole line that has a bare citation.
39
- pattern = re .compile (r'^.*(?<!@)(?:' + re .escape (citation_key ) + r').*$' , re .MULTILINE )
40
-
41
- # Find all matching lines
42
- if pattern .search (content ) is not None :
43
- matches = pattern .finditer (content )
34
+ # Find all positions of the citation key without the @ symbol
35
+ key_pattern = re .compile (re .escape (citation_key ))
36
+ matches = []
37
+ for match in key_pattern .finditer (content ):
38
+ start_index = match .start ()
39
+ line_start = content .rfind ('\n ' , 0 , start_index ) + 1
40
+ line_end = content .find ('\n ' , start_index )
41
+ if line_end == - 1 :
42
+ line_end = len (content )
43
+ line = content [line_start :line_end ]
44
+
45
+ # Ensure the citation key is not immediately preceded by an @ symbol
46
+ if '@' not in content [start_index - 1 :start_index ]:
47
+ matches .append (line )
48
+
49
+ if matches :
44
50
issue_tuple = citation_key , matches
45
51
yield issue_tuple
46
-
52
+
47
53
48
54
if __name__ == "__main__" :
49
55
@@ -61,12 +67,16 @@ def find_bare_citations(markdown_file_path:Path, citation_keys:list)->list:
61
67
62
68
args = parser .parse_args ()
63
69
70
+ print (f"Parsing { args .bib_file_path } for citations" )
71
+ extract_citations_start = timeit .default_timer ()
64
72
citation_keys = extract_citation_keys (args .bib_file_path )
73
+ extract_citations_time = timeit .default_timer () - extract_citations_start
74
+ print (f"Finding citations took { extract_citations_time } seconds" )
75
+
65
76
66
- # for i in range(50):
67
- # citation_keys.extend([str(uuid.uuid4) for _ in range(100)])
77
+ print (f"Bibliography had { len (citation_keys )} citations" )
68
78
69
- print (f"Bibliography had { len ( citation_keys ) } citations, checking for bare citations: " )
79
+ print (f"Beginning bare- citations check: checking { args . markdown_file_path } " )
70
80
71
81
start_time = timeit .default_timer ()
72
82
issues = find_bare_citations (args .markdown_file_path , citation_keys )
@@ -81,7 +91,8 @@ def find_bare_citations(markdown_file_path:Path, citation_keys:list)->list:
81
91
print (f"Citation key: { citation_key } " )
82
92
83
93
for match in matches :
84
- print (f"* { match .group (0 )} " )
94
+ print (match )
95
+ # print(f"* {match.group(0)}")
85
96
86
97
# iff we've gotten here then issues exist and we should set return value to 1 at the end.
87
98
issues_exist = True
0 commit comments