-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReferenceGenerator.py
53 lines (42 loc) · 1.6 KB
/
ReferenceGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# ReferenceGenerator.py
# This script extracts arXiv IDs from a text file, generates citations, and appends them to an output file.
# Required libraries: arxiv, datetime, re, os
import arxiv
import datetime
import re
import os
# Extract arXiv ID from text using regex
def extract_arxiv_id(text):
pattern = r'arXiv:(\d{4}\.\d{5}v\d+)'
match = re.search(pattern, text)
if match:
return match.group(1)
return None
# Generate citation for a given arXiv ID
def get_arxiv_citation(arxiv_id):
search = arxiv.Search(id_list=[arxiv_id])
paper = next(search.results())
authors = ", ".join([author.name for author in paper.authors])
year = paper.published.year
citation = f"{authors}. ({year}). {paper.title}. ArXiv.org. https://arxiv.org/abs/{arxiv_id}"
return citation
# Append citation to the output file
def append_citation_to_output(citation):
with open('output.txt', 'a', encoding='utf-8') as file:
file.write('\n\n') # Add two newlines to create a blank line
file.write(citation)
# Read the pdf_to_text_temp.txt file
with open('pdf_to_text_temp.txt', 'r', encoding='utf-8') as file:
content = file.read()
# Extract arXiv ID from the file content
arxiv_id = extract_arxiv_id(content)
if arxiv_id:
# Generate citation using the extracted arXiv ID
citation = get_arxiv_citation(arxiv_id)
print("Generated citation:")
print(citation)
# Append the citation to the output.txt file
append_citation_to_output(citation)
print("Citation has been added to the end of output.txt")
else:
print("No valid arXiv ID found")