Skip to content

Commit

Permalink
update epu srcipts and outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
ccxzhang committed Mar 21, 2024
1 parent 3f8e32b commit fbebfcc
Show file tree
Hide file tree
Showing 55 changed files with 19,407 additions and 9,218 deletions.
10 changes: 5 additions & 5 deletions docs/images/interactive/text/ep_sentiment.html

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions docs/images/interactive/text/epu_pic.html

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/text/epu.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Uncertainty poses challenges to the stability and growth of economies. The Economic Policy Uncertainty Index (EPU) developed by {cite:t}`baker2016measuring` emerges as a vital tool to measure both economic- and policy-related uncertainties. Sepcifically, they count the frequency of the economic- and policy-related terms' appearance (in relative to the number of all news) on newspapers, and the EPU index would be produced with standardization and normalization:

- Let $ X_{it} = \frac{\text{EPU news in newspaper } i \text{ at time } t}{\text{All news in newspaper } i \text{ at time } t} $ and pre-defined $T_1$ to be the standardization and normalization period;
- Let $ X_{it} = \frac{\text{EPU news in newspaper } i \text{ at time } t}{\text{All scraped news in newspaper } i \text{ at time } t} $ and pre-defined $T_1$ to be the standardization and normalization period;

- Calculate the standard deviation $\sigma_i$ for each newspaper $i$ over $T_1$.

Expand All @@ -21,7 +21,7 @@ Beyond the $Z_t$ being arithmetic mean of $Y_{it}$, we develop a weighted scheme
frameborder="0" marginwidth="0" marginheight="0" width="800" height="750"></iframe>
</div>

The sentiment of economic policy-related news could act as a barometer for uncertainty. Analyzing the shifts and changes in the sentiment of news articles and reports pertaining to economic policy could reflect the underlying economic uncertainty.
The sentiment of economic policy-related news could act as a barometer for uncertainty. Analyzing the shifts and changes in the sentiment of news articles and reports pertaining to economic policy could reflect the underlying economic uncertainty. To construct the sentiment scores, we firstly filters news identified as "economic" and "policy," calculates the sentiment (polarity) score for each news, and aggregates scores by each month.

<div>
<iframe src="https://worldbank.github.io/pacific-observatory/interactive/text/ep_sentiment.html"
Expand Down
322 changes: 100 additions & 222 deletions notebooks/text/EPU.ipynb

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions notebooks/text/EPU_viz.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"id": "109cd21c",
"metadata": {
"scrolled": true
Expand All @@ -26,13 +26,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "17b0ce40",
"metadata": {},
"outputs": [],
"source": [
"countries = [\n",
" \"pacific\", \"papua_new_guinea\", \"solomon_islands\", \"fiji\", \"samoa\",\n",
" \"pacific\",\n",
" \"papua_new_guinea\", \"solomon_islands\", \"fiji\", \"samoa\",\n",
" \"vanuatu\"\n",
"]\n",
"\n",
Expand Down
140 changes: 97 additions & 43 deletions notebooks/text/Sentiment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@
"from bokeh.layouts import Row, column, gridplot\n",
"from bokeh.models import (Title, Legend, ColumnDataSource, Select, HoverTool,\n",
" BoxZoomTool, ResetTool, DataTable, DateFormatter,\n",
" TableColumn)\n",
" TableColumn, LinearAxis, LegendItem, CustomJS, Toggle)\n",
"from bokeh.models.layouts import TabPanel, Tabs\n",
"from bokeh.plotting import figure, output_file, show, output_notebook\n",
"output_file(filename=sys.path[0] + \"docs/images/interactive/text/ep_sentiment.html\")"
"from bokeh.palettes import Category20\n",
"\n",
"output_file(filename=sys.path[0] +\n",
" \"docs/images/interactive/text/ep_sentiment.html\")"
]
},
{
Expand Down Expand Up @@ -65,42 +68,41 @@
" pbar.update(1)\n",
" return results\n",
"\n",
"\n",
"def calculate_sentiment(df):\n",
"\n",
" df = df[(df.econ == True)].reset_index(drop=True)\n",
" df = df[(df.econ) & (df.policy)].reset_index(drop=True)\n",
"\n",
" sent_res = sentiment_analysis(df)\n",
"\n",
" df[\"score\"] = [i[\"compound\"] for i in sent_res]\n",
" df[\"date\"] = df[\"date\"].apply(lambda x: parse(str(x)).date())\n",
" df[\"date\"] = pd.to_datetime(df[\"date\"])\n",
" month_sent = (df.set_index(\"date\")\n",
" .groupby(pd.Grouper(freq=\"MS\"))[[\"score\"]]\n",
" .mean()\n",
" .reset_index())\n",
" month_sent = (df.set_index(\"date\").groupby(\n",
" pd.Grouper(freq=\"MS\"))[[\"score\"]].mean().reset_index())\n",
"\n",
" return month_sent"
" return month_sent, df.score.mean(), df.score.std()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e0b8b9c7",
"metadata": {
"scrolled": false
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5065/5065 [00:15<00:00, 317.71it/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 191.30it/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1220/1220 [00:03<00:00, 398.76it/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8180/8180 [00:16<00:00, 487.82it/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8252/8252 [00:20<00:00, 409.74it/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4430/4430 [00:09<00:00, 460.15it/s]\n",
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9506/9506 [00:28<00:00, 334.85it/s]\n"
"100%|███████████████████████████████████████████████████████████████████████████████████| 4474/4474 [00:15<00:00, 294.10it/s]\n",
"100%|███████████████████████████████████████████████████████████████████████████████████████| 68/68 [00:00<00:00, 184.43it/s]\n",
"100%|█████████████████████████████████████████████████████████████████████████████████████| 963/963 [00:02<00:00, 350.31it/s]\n",
"100%|███████████████████████████████████████████████████████████████████████████████████| 7093/7093 [00:15<00:00, 462.35it/s]\n",
"100%|███████████████████████████████████████████████████████████████████████████████████| 6646/6646 [00:17<00:00, 381.18it/s]\n",
"100%|███████████████████████████████████████████████████████████████████████████████████| 3469/3469 [00:08<00:00, 419.12it/s]\n",
"100%|███████████████████████████████████████████████████████████████████████████████████| 6967/6967 [00:23<00:00, 298.41it/s]\n"
]
}
],
Expand All @@ -109,90 +111,142 @@
"for country in country_dirs:\n",
" country_name = country.split(\"/\")[-1]\n",
" news_dirs = [\n",
" f\"{country}/{file}\" for file in os.listdir(country) if \"news\" in file\n",
" f\"{country}/{file}\" for file in os.listdir(country)\n",
" if \"news\" in file and \"ner\" not in file\n",
" ]\n",
"\n",
" e = EPU(news_dirs, cutoff=None)\n",
" e.get_epu_category(\n",
" subset_condition=\"date >= '2016-01-01' and date < '2024-01-01'\")\n",
" \n",
"\n",
" dfs = pd.DataFrame()\n",
" for _, df in e.raw_files:\n",
" df_select = df[[\"news\", \"date\", \"econ\", \"policy\"]]\n",
" dfs = pd.concat([dfs, df_select], axis=0).reset_index(drop=True)\n",
" \n",
" sent_df = calculate_sentiment(dfs)\n",
"\n",
" sent_df, sent_mean, sent_std = calculate_sentiment(dfs)\n",
"\n",
" min_date = str(sent_df.date.min().date())\n",
" max_date = str(sent_df.date.max().date())\n",
"\n",
" sent_df = generate_continous_df(sent_df, min_date, max_date, freq=\"MS\")\n",
" sent_mean, sent_std = sent_df.score.mean(), sent_df.score.std()\n",
" sent_df[\"z_score\"] = sent_df[\"score\"].apply(lambda x:\n",
" (x - sent_mean) / sent_std)\n",
" sent_dfs.append((country_name, sent_df))\n",
" \n",
" sent_dfs.append((country_name, sent_df, sent_mean))\n",
"\n",
" saved_folder = output_dir + f\"{country_name}/sentiment/\"\n",
" if not os.path.exists(saved_folder):\n",
" os.mkdir(saved_folder)\n",
" \n",
" sent_df.to_csv(saved_folder + f\"{country_name}_sentiment.csv\", encoding=\"utf-8\")"
"\n",
" sent_df.to_csv(saved_folder + f\"{country_name}_sentiment.csv\",\n",
" encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a77e2c0c",
"execution_count": 5,
"id": "5d1cc6db",
"metadata": {},
"outputs": [],
"source": [
"tabs = []\n",
"for name, df in sent_dfs:\n",
"z_score_df = pd.DataFrame()\n",
"for name, df, sentiment in sent_dfs:\n",
" if name != \"tonga\":\n",
" temp = df[[\"date\", \"z_score\"]].rename({\"z_score\": name}, axis=1)\n",
" if z_score_df.empty:\n",
" z_score_df = temp\n",
" else:\n",
" z_score_df = z_score_df.merge(temp, how=\"outer\", on=\"date\")\n",
"\n",
"z_score_df = z_score_df.round(3)\n",
"source = ColumnDataSource(z_score_df)\n",
"hover = HoverTool(tooltips=[('Date', '@date{%Y-%m}'), ('Z-Score', '@$name')],\n",
" formatters={'@date': 'datetime'})\n",
"p = figure(height=400,\n",
" width=750,\n",
" x_axis_type=\"datetime\",\n",
" tools=[hover, BoxZoomTool(), ResetTool()])\n",
"\n",
"columns = [\n",
" TableColumn(field=\"date\", title=\"Date\", formatter=DateFormatter()),\n",
"]\n",
"colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']\n",
"line_styles = ['solid', 'dashed', 'dotted', 'dotdash', 'dashdot', 'solid']\n",
"toggles = []\n",
"\n",
"for (idx, name) in enumerate(z_score_df.columns[1:]):\n",
" if name != \"tonga\":\n",
" prettfied_name = \" \".join(w[0].upper() + w[1:]\n",
" for w in name.split(\"_\"))\n",
" line = p.line(\"date\",\n",
" name,\n",
" source=z_score_df,\n",
" name=name,\n",
" line_width=2,\n",
" color=colors[idx],\n",
" line_dash=line_styles[idx])\n",
" line.visible = idx < 1\n",
" toggle = Toggle(label=f\"{prettfied_name}\",\n",
" button_type=\"light\",\n",
" active=(idx < 1))\n",
" toggle.js_link('active', line, 'visible')\n",
"\n",
" toggles.append(toggle)\n",
" columns.append(TableColumn(field=name, title=prettfied_name))\n",
"\n",
"dt = DataTable(source=source, columns=columns, width=720, height=300)\n",
"combined = column(Row(*toggles), p, dt)\n",
"tabs.append(TabPanel(child=combined, title=\"Comparison\"))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "22778802",
"metadata": {},
"outputs": [],
"source": [
"for name, df, sentiment in sent_dfs:\n",
" if name != \"tonga\":\n",
" df = df.round(3)\n",
" source = ColumnDataSource(df)\n",
"\n",
" hover = HoverTool(tooltips=[('Date', '@date{%Y-%m}'),\n",
" ('Sentiment', '@score'),\n",
" ('Z-Score', '@z_score')],\n",
" ('Sentiment', '@score')],\n",
" formatters={'@date': 'datetime'})\n",
"\n",
" p = figure(height=400,\n",
" width=750,\n",
" y_range=(df.score.min() - 0.1, df.score.max() + 0.1),\n",
" x_axis_type=\"datetime\",\n",
" tools=[hover, BoxZoomTool(), ResetTool()])\n",
" tools=[hover, BoxZoomTool(),\n",
" ResetTool()])\n",
"\n",
" # Define the primary y-axis for 'score'\n",
" p.line(\"date\",\n",
" \"score\",\n",
" source=source,\n",
" name=\"sentiment\",\n",
" line_width=2,\n",
" legend_label=\"Sentiment\")\n",
"\n",
" p.line(\"date\",\n",
" \"z_score\",\n",
" source=source,\n",
" name=\"epu_unweighted\",\n",
" color='darkorange',\n",
" line_width=1.5,\n",
" legend_label=\"Z score\")\n",
"\n",
" p.hspan(y=sentiment, line_width=2, line_dash=[\"dashed\"])\n",
" p.legend.location = \"top_left\"\n",
" p.legend.click_policy = \"mute\"\n",
"\n",
" columns = [\n",
" TableColumn(field=\"date\", title=\"Date\", formatter=DateFormatter()),\n",
" TableColumn(field=\"score\", title=\"Sentiment (Avg.)\"),\n",
" TableColumn(field=\"z_score\", title=\"Z Score\"),\n",
" TableColumn(field=\"score\", title=\"Sentiment (Avg.)\")\n",
" ]\n",
" dt = DataTable(source=source, columns=columns, width=700, height=300)\n",
" dt = DataTable(source=source, columns=columns, width=720, height=300)\n",
" combined = column(p, dt)\n",
"\n",
" # Uppercase the first letter of the country name\n",
" title = \" \".join(w[0].upper() + w[1:] for w in name.split(\"_\"))\n",
" tab = TabPanel(child=combined, title=title)\n",
" tabs.append(tab)\n",
" \n",
"\n",
"show(Tabs(tabs=tabs))"
]
}
Expand Down
Loading

0 comments on commit fbebfcc

Please sign in to comment.