Skip to content

Commit

Permalink
Improved comments
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessioNar committed Dec 24, 2024
1 parent 11ecfcb commit 6250040
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion tulit/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,12 +268,15 @@ def get_citations(self, citations_xpath, citation_xpath, extract_eId=None):

citations = []
for index, citation in enumerate(citations_section.findall(citation_xpath, namespaces=self.namespaces)):

# Extract the citation text
text = "".join(citation.itertext()).strip()
text = text.replace('\n', '').replace('\t', '').replace('\r', '') # remove newline and tab characters
text = re.sub(' +', ' ', text) # replace multiple spaces with a single space

# Get an eId for the citation, depending on the XML format
eId = extract_eId(citation, index) if extract_eId else index
# Up until here, the code is the same as for Formex

citations.append({
'eId' : eId,
'text': text,
Expand Down

0 comments on commit 6250040

Please sign in to comment.