Skip to content

Commit

Permalink
update ner and ep-sentiment
Browse files Browse the repository at this point in the history
  • Loading branch information
ccxzhang committed Feb 9, 2024
1 parent dbef2c4 commit 15b9ead
Show file tree
Hide file tree
Showing 22 changed files with 206,677 additions and 3 deletions.
2,598 changes: 2,598 additions & 0 deletions data/text/fiji/fiji_abc_news_ner.csv

Large diffs are not rendered by default.

6,159 changes: 6,159 additions & 0 deletions data/text/fiji/fiji_rnz_news_ner.csv

Large diffs are not rendered by default.

51,304 changes: 51,304 additions & 0 deletions data/text/fiji/fiji_sun_news_ner.csv

Large diffs are not rendered by default.

141 changes: 141 additions & 0 deletions data/text/marshall_islands/marshall_islands_abc_news_ner.csv

Large diffs are not rendered by default.

4,641 changes: 4,641 additions & 0 deletions data/text/papua_new_guinea/papua_new_guinea_abc_news_ner.csv

Large diffs are not rendered by default.

5,706 changes: 5,706 additions & 0 deletions data/text/papua_new_guinea/papua_new_guinea_rnz_news_ner.csv

Large diffs are not rendered by default.

2,257 changes: 2,257 additions & 0 deletions data/text/papua_new_guinea/png_business_news_ner.csv

Large diffs are not rendered by default.

35,495 changes: 35,495 additions & 0 deletions data/text/samoa/samoa_observer_news_ner.csv

Large diffs are not rendered by default.

4,429 changes: 4,429 additions & 0 deletions data/text/samoa/samoa_rnz_news_ner.csv

Large diffs are not rendered by default.

9,237 changes: 9,237 additions & 0 deletions data/text/solomon_islands/island_sun_news_ner.csv

Large diffs are not rendered by default.

9,170 changes: 9,170 additions & 0 deletions data/text/solomon_islands/sibc_news_ner.csv

Large diffs are not rendered by default.

1,710 changes: 1,710 additions & 0 deletions data/text/solomon_islands/solomon_islands_abc_news_ner.csv

Large diffs are not rendered by default.

2,073 changes: 2,073 additions & 0 deletions data/text/solomon_islands/solomon_islands_rnz_news_ner.csv

Large diffs are not rendered by default.

14,552 changes: 14,552 additions & 0 deletions data/text/solomon_islands/solomon_star_news_ner.csv

Large diffs are not rendered by default.

11,140 changes: 11,140 additions & 0 deletions data/text/solomon_islands/solomon_times_news_ner.csv

Large diffs are not rendered by default.

14,919 changes: 14,919 additions & 0 deletions data/text/tonga/matangi_news_ner.csv

Large diffs are not rendered by default.

820 changes: 820 additions & 0 deletions data/text/tonga/tonga_abc_news_ner.csv

Large diffs are not rendered by default.

29,470 changes: 29,470 additions & 0 deletions data/text/vanuatu/daily_posts_news_ner.csv

Large diffs are not rendered by default.

784 changes: 784 additions & 0 deletions data/text/vanuatu/vanuatu_abc_news_ner.csv

Large diffs are not rendered by default.

63 changes: 63 additions & 0 deletions docs/images/interactive/text/ep_sentiment.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
beautifulsoup4==4.12.3
bokeh==3.2.1
chardet==5.2.0
numpy==1.26.0
pandas==2.0.3
gensim==3.8.3
google_api_python_client==2.94.0
lxml==4.9.3
matplotlib==3.5.3
networkx==3.1
nltk==3.8.1
pandas==2.0.3
pdf2image==1.16.3
Pillow==9.4.0
Pillow==10.2.0
pmdarima==2.0.4
prophet==1.1.4
Expand Down
8 changes: 7 additions & 1 deletion src/text/epu.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,12 @@ class EPU:
def __init__(self,
filepath: Union[str, List[str]],
cutoff: str,
non_epu_urls: list = None,
econ_terms: list = ECON_LIST,
policy_terms: list = POLICY_LIST,
uncertainty_terms: list = UNCERTAINTY_LIST,
additional_terms: Union[List, None] = None):

if isinstance(filepath, str):
self.filepath = [filepath]
elif isinstance(filepath, list):
Expand All @@ -76,6 +77,7 @@ def __init__(self,
self.additional_terms = additional_terms
self.raw_files = []
self.cutoff = cutoff
self.non_epu_urls = non_epu_urls if non_epu_urls is not None else []
self.min_date = None
self.max_date = None
self.epu_stats = pd.DataFrame()
Expand Down Expand Up @@ -157,6 +159,10 @@ def get_epu_category(self, subset_condition=None):
raw["additional"] = raw["news"].str.lower().apply(
is_in_word_list, terms=self.additional_terms)
raw["epu"] = (raw.epu == True) & (raw.additional == True)

if raw["url"].isin(self.non_epu_urls).sum() > 0:
raw.loc[raw.url.isin(self.non_epu_urls), "epu"] = False

self.raw_files.append((source, raw.copy()))

def calculate_news_and_epu_counts(self,
Expand Down

0 comments on commit 15b9ead

Please sign in to comment.