Skip to content

Commit

Permalink
add article_id for easy identifying examples
Browse files Browse the repository at this point in the history
  • Loading branch information
oplatek committed Nov 29, 2024
1 parent 6045b4c commit 1d904f1
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions factgenie/datasets/propaganda_techniques.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,18 +217,19 @@ def load_examples(self, split, data_path):
examples = []
articles_files = glob.glob(f"{data_path}/{split}/article*.txt")
for example_idx, f in enumerate(articles_files):
article_id = str(Path(f).stem)[len("article") : -len(".txt")]
article_id = str(Path(f).stem)[len("article") :]
self._article_id_to_example_idx[article_id] = example_idx
with open(f, "r") as file:
article = file.read()
examples.append(article.strip())
examples.append({"text": article.strip(), "id": article_id})
return examples

def render(self, example):
"""TODO Any other whitespace to handle except newline?"""
html = "<div>"
html += f"<h3>ID: article{example['id']}.txt </h3>" # render the full file name
html += "<p>"
html += example.replace("\\n", "<br>")
html += example["text"].replace("\\n", "<br>")
html += "</p>"
html += "</div>"
return html
Expand Down Expand Up @@ -298,7 +299,7 @@ def download(
"split": split,
"setup_id": dataset_id,
"example_idx": example_idx,
"metadata": {},
"metadata": {"article": article_id},
"output": article_txt,
}
outputw.write(json.dumps(article_entry) + "\n")
Expand All @@ -316,6 +317,7 @@ def download(
"annotator_id": "idk",
"annotator_group": 0,
"campaign_id": PCT_CAMPAING_ID,
"article": article_id, # original dataset id
},
"annotations": cls._load_example_annotations(
annotation_file, article_txt, article_id, categories_names
Expand Down Expand Up @@ -362,6 +364,8 @@ def _load_example_annotations(cls, annotation_file: Union[str, Path], article: s
"start": start_idx,
}
annotations.append(annotation_d)

annotations = sorted(annotations, key=lambda x: x["start"])
return annotations


Expand Down

0 comments on commit 1d904f1

Please sign in to comment.