diff --git a/data/text/solomon_islands/st_ldavis.html b/data/text/solomon_islands/st_ldavis.html new file mode 100644 index 00000000..43ec898c --- /dev/null +++ b/data/text/solomon_islands/st_ldavis.html @@ -0,0 +1,41 @@ + + + + +
+ \ No newline at end of file diff --git a/scripts/notebooks/text/PACNEWS.ipynb b/scripts/notebooks/text/PACNEWS.ipynb index 8d0c0f65..57735750 100644 --- a/scripts/notebooks/text/PACNEWS.ipynb +++ b/scripts/notebooks/text/PACNEWS.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "7f762fdb", + "id": "0bc993d4", "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ }, { "cell_type": "markdown", - "id": "958d3ebf", + "id": "b3dbfca0", "metadata": {}, "source": [ "## FACTIVA" @@ -28,8 +28,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "2aa29868", + "execution_count": 3, + "id": "21ad0297", "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "markdown", - "id": "9adfc80f", + "id": "2f1d617c", "metadata": {}, "source": [ "Converting RTF file to TXT by following commands on Mac:\n", @@ -59,8 +59,8 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "39e6dc0e", + "execution_count": 5, + "id": "ad7c9a95", "metadata": { "scrolled": false }, @@ -75,7 +75,6 @@ " entry_lst = entry.strip().split(\"\\n\\n\")\n", " title = entry_lst[0]\n", " date = entry_lst[1].split(\"\\n\")[1]\n", - " if date \n", " entry_length = len(entry_lst)\n", " if idx == len(entries) - 1:\n", " content = \"\".join((entry_lst[i]) for i in range(entry_length)\n", @@ -90,8 +89,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "7442868f", + "execution_count": 6, + "id": "f94aeee4", "metadata": { "scrolled": false }, @@ -124,7 +123,7 @@ " \n", " \n", "\n", + " | title | \n", + "date | \n", + "news | \n", + "
---|---|---|---|
0 | \n", + "Visa issue put to Gillard | \n", + "10 May 2013 | \n", + "PORT MORESBY, May 10 -- Papua New Guinea Prime... | \n", + "
1 | \n", + "ECP could be revived - Australia looks to stre... | \n", + "10 May 2013 | \n", + "PORT MORESBY, May 10 -- More Australian police... | \n", + "
2 | \n", + "Vanuatu National Provident Fund team visits SINPF | \n", + "10 May 2013 | \n", + "HONIARA, May 10 -- A delegation from the Vanua... | \n", + "
3 | \n", + "CNMI eyes tourism office in Russia | \n", + "10 May 2013 | \n", + "SAIPAN, May 10 -- Due to the overwhelming grow... | \n", + "
4 | \n", + "Solomons' Gizo airport to close for major upgr... | \n", + "10 May 2013 | \n", + "HONIARA, May 10 -- The Solomon Islands' Gizo A... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
9895 | \n", + "Fiji eyes deals with PNG provinces | \n", + "14 October 2013 | \n", + "PORT MORESBY, Oct. 14 -- Fiji is establishing ... | \n", + "
9896 | \n", + "0.8pc blind in Fiji | \n", + "14 October 2013 | \n", + "SUVA, Oct. 14 -- The prevalence of blindness i... | \n", + "
9897 | \n", + "Fiji calls for greater support from EU | \n", + "14 October 2013 | \n", + "SIGATOKA, Oct. 14 -- Fiji wants the European U... | \n", + "
9898 | \n", + "Intervention by Vanuatu's Minister for Tourism... | \n", + "14 October 2013 | \n", + "BRUSSELS, Oct. 14 -- On Economic Partnership A... | \n", + "
9899 | \n", + "UN health agency launches initiative to phase ... | \n", + "14 October 2013 | \n", + "GENEVA, Oct. 14 -- The United Nations World He... | \n", + "
9201 rows × 3 columns
\n", + "\n", + " | lags | \n", + "ccf | \n", + "
---|---|---|
70 | \n", + "-3.0 | \n", + "0.054080 | \n", + "
71 | \n", + "-2.0 | \n", + "-0.091193 | \n", + "
72 | \n", + "-1.0 | \n", + "0.319960 | \n", + "
73 | \n", + "0.0 | \n", + "-0.027509 | \n", + "
74 | \n", + "1.0 | \n", + "-0.092961 | \n", + "
75 | \n", + "2.0 | \n", + "-0.055663 | \n", + "
76 | \n", + "3.0 | \n", + "-0.043655 | \n", + "
77 | \n", + "4.0 | \n", + "0.084574 | \n", + "
78 | \n", + "5.0 | \n", + "-0.037249 | \n", + "
79 | \n", + "6.0 | \n", + "0.160906 | \n", + "
\n", + " | lag | \n", + "AIC | \n", + "BIC | \n", + "FPE | \n", + "HQIC | \n", + "
---|---|---|---|---|---|
0 | \n", + "1 | \n", + "9.805674 | \n", + "10.004733 | \n", + "18138.630383 | \n", + "9.884332 | \n", + "
1 | \n", + "2 | \n", + "9.881134 | \n", + "10.215655 | \n", + "19569.805711 | \n", + "10.013124 | \n", + "
2 | \n", + "3 | \n", + "10.010146 | \n", + "10.482401 | \n", + "22290.211200 | \n", + "10.196191 | \n", + "
3 | \n", + "4 | \n", + "10.005401 | \n", + "10.617725 | \n", + "22233.085776 | \n", + "10.246231 | \n", + "
4 | \n", + "5 | \n", + "10.104326 | \n", + "10.859116 | \n", + "24634.831732 | \n", + "10.400676 | \n", + "
5 | \n", + "6 | \n", + "10.182071 | \n", + "11.081788 | \n", + "26778.109669 | \n", + "10.534678 | \n", + "
6 | \n", + "7 | \n", + "10.206808 | \n", + "11.253980 | \n", + "27679.830094 | \n", + "10.616415 | \n", + "
7 | \n", + "8 | \n", + "10.306809 | \n", + "11.504034 | \n", + "30958.238883 | \n", + "10.774158 | \n", + "
8 | \n", + "9 | \n", + "10.415407 | \n", + "11.765352 | \n", + "35085.730392 | \n", + "10.941238 | \n", + "
9 | \n", + "10 | \n", + "10.541341 | \n", + "12.046747 | \n", + "40702.167664 | \n", + "11.126393 | \n", + "
10 | \n", + "11 | \n", + "10.485989 | \n", + "12.149671 | \n", + "39696.265033 | \n", + "11.130995 | \n", + "
11 | \n", + "12 | \n", + "10.300236 | \n", + "12.125084 | \n", + "34327.867964 | \n", + "11.005920 | \n", + "
\n", + " | date | \n", + "cpi | \n", + "epu_index | \n", + "inflation | \n", + "epu_change | \n", + "epu_change_1 | \n", + "epu_change_2 | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "2017-01-01 | \n", + "98.6 | \n", + "27.257114 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1 | \n", + "2017-02-01 | \n", + "99.0 | \n", + "12.898663 | \n", + "0.405680 | \n", + "-52.677809 | \n", + "NaN | \n", + "NaN | \n", + "
2 | \n", + "2017-03-01 | \n", + "100.6 | \n", + "27.257114 | \n", + "1.616162 | \n", + "111.317351 | \n", + "-52.677809 | \n", + "NaN | \n", + "
3 | \n", + "2017-04-01 | \n", + "100.7 | \n", + "27.257114 | \n", + "0.099404 | \n", + "0.000000 | \n", + "111.317351 | \n", + "-52.677809 | \n", + "
4 | \n", + "2017-05-01 | \n", + "101.0 | \n", + "27.257114 | \n", + "0.297915 | \n", + "0.000000 | \n", + "0.000000 | \n", + "111.317351 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
70 | \n", + "2022-11-01 | \n", + "117.2 | \n", + "74.146784 | \n", + "-0.509338 | \n", + "-30.521654 | \n", + "112.937717 | \n", + "-22.245716 | \n", + "
71 | \n", + "2022-12-01 | \n", + "117.0 | \n", + "74.146784 | \n", + "-0.170648 | \n", + "0.000000 | \n", + "-30.521654 | \n", + "112.937717 | \n", + "
72 | \n", + "2023-01-01 | \n", + "118.9 | \n", + "44.235094 | \n", + "1.623932 | \n", + "-40.341183 | \n", + "0.000000 | \n", + "-30.521654 | \n", + "
73 | \n", + "2023-02-01 | \n", + "119.2 | \n", + "67.197958 | \n", + "0.252313 | \n", + "51.910964 | \n", + "-40.341183 | \n", + "0.000000 | \n", + "
74 | \n", + "2023-03-01 | \n", + "119.8 | \n", + "61.339754 | \n", + "0.503356 | \n", + "-8.717830 | \n", + "51.910964 | \n", + "-40.341183 | \n", + "
75 rows × 7 columns
\n", + "Dep. Variable: | inflation | R-squared: | 0.076 | \n", + "
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.061 | \n", + "
Method: | Least Squares | F-statistic: | 5.169 | \n", + "
Date: | Mon, 11 Sep 2023 | Prob (F-statistic): | 0.0264 | \n", + "
Time: | 16:08:01 | Log-Likelihood: | -108.59 | \n", + "
No. Observations: | 65 | AIC: | 221.2 | \n", + "
Df Residuals: | 63 | BIC: | 225.5 | \n", + "
Df Model: | 1 | \n", + " | |
Covariance Type: | nonrobust | \n", + " |
coef | std err | t | P>|t| | [0.025 | 0.975] | \n", + "|
---|---|---|---|---|---|---|
Intercept | 0.1340 | 0.169 | 0.792 | 0.431 | -0.204 | 0.472 | \n", + "
epu_change_1 | 0.0021 | 0.001 | 2.273 | 0.026 | 0.000 | 0.004 | \n", + "
Omnibus: | 4.718 | Durbin-Watson: | 1.491 | \n", + "
---|---|---|---|
Prob(Omnibus): | 0.095 | Jarque-Bera (JB): | 5.025 | \n", + "
Skew: | 0.254 | Prob(JB): | 0.0811 | \n", + "
Kurtosis: | 4.264 | Cond. No. | 192. | \n", + "
\n", + " | var_inflation | \n", + "var_epu_index | \n", + "inflation | \n", + "
---|---|---|---|
1 | \n", + "-0.122479 | \n", + "113.201728 | \n", + "1.616162 | \n", + "
2 | \n", + "0.178068 | \n", + "103.597059 | \n", + "0.099404 | \n", + "
3 | \n", + "-0.118105 | \n", + "114.215800 | \n", + "0.297915 | \n", + "
4 | \n", + "-0.079343 | \n", + "112.826035 | \n", + "-0.396040 | \n", + "
5 | \n", + "-0.272504 | \n", + "118.699620 | \n", + "-0.099404 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
62 | \n", + "0.511572 | \n", + "103.481791 | \n", + "1.919561 | \n", + "
63 | \n", + "0.551307 | \n", + "95.943773 | \n", + "2.152466 | \n", + "
64 | \n", + "0.557464 | \n", + "95.005629 | \n", + "0.175593 | \n", + "
65 | \n", + "0.064494 | \n", + "110.728955 | \n", + "1.139351 | \n", + "
66 | \n", + "0.215050 | \n", + "104.644432 | \n", + "1.473137 | \n", + "
66 rows × 3 columns
\n", + "\n", + " | date | \n", + "st_news_count | \n", + "st_epu_count | \n", + "st_z_score | \n", + "ss_news_count | \n", + "ss_epu_count | \n", + "ss_z_score | \n", + "tis_news_count | \n", + "tis_epu_count | \n", + "tis_z_score | \n", + "... | \n", + "sibc_z_score | \n", + "z_score | \n", + "epu_index | \n", + "news_count | \n", + "st_ratio | \n", + "ss_ratio | \n", + "tis_ratio | \n", + "sibc_ratio | \n", + "adj_z_score | \n", + "adj_epu_index | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
56 | \n", + "2011-12-01 | \n", + "37.0 | \n", + "1.0 | \n", + "0.917445 | \n", + "0.0 | \n", + "0.0 | \n", + "0.000000 | \n", + "0.0 | \n", + "0.0 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.229361 | \n", + "43.259843 | \n", + "37.0 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.917445 | \n", + "198.457540 | \n", + "
104 | \n", + "2015-12-01 | \n", + "3.0 | \n", + "0.0 | \n", + "0.274102 | \n", + "55.0 | \n", + "2.0 | \n", + "1.976608 | \n", + "0.0 | \n", + "0.0 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.562677 | \n", + "106.126693 | \n", + "58.0 | \n", + "0.051724 | \n", + "0.948276 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.888547 | \n", + "408.522194 | \n", + "
125 | \n", + "2017-09-01 | \n", + "0.0 | \n", + "0.0 | \n", + "0.000000 | \n", + "55.0 | \n", + "4.0 | \n", + "4.577167 | \n", + "165.0 | \n", + "1.0 | \n", + "0.210830 | \n", + "... | \n", + "0.298367 | \n", + "1.271591 | \n", + "239.834991 | \n", + "284.0 | \n", + "0.000000 | \n", + "0.193662 | \n", + "0.580986 | \n", + "0.225352 | \n", + "1.076150 | \n", + "232.788041 | \n", + "
127 | \n", + "2017-11-01 | \n", + "0.0 | \n", + "0.0 | \n", + "0.000000 | \n", + "84.0 | \n", + "2.0 | \n", + "1.078796 | \n", + "151.0 | \n", + "4.0 | \n", + "0.334047 | \n", + "... | \n", + "1.849475 | \n", + "0.815579 | \n", + "153.826584 | \n", + "316.0 | \n", + "0.000000 | \n", + "0.265823 | \n", + "0.477848 | \n", + "0.256329 | \n", + "0.920466 | \n", + "199.111238 | \n", + "
152 | \n", + "2019-12-01 | \n", + "54.0 | \n", + "2.0 | \n", + "1.358758 | \n", + "118.0 | \n", + "0.0 | \n", + "0.623951 | \n", + "6.0 | \n", + "1.0 | \n", + "4.072718 | \n", + "... | \n", + "6.660640 | \n", + "3.179017 | \n", + "599.594917 | \n", + "203.0 | \n", + "0.266010 | \n", + "0.581281 | \n", + "0.029557 | \n", + "0.123153 | \n", + "1.664785 | \n", + "360.119027 | \n", + "
154 | \n", + "2020-02-01 | \n", + "38.0 | \n", + "0.0 | \n", + "0.274102 | \n", + "183.0 | \n", + "5.0 | \n", + "1.330021 | \n", + "8.0 | \n", + "0.0 | \n", + "0.372474 | \n", + "... | \n", + "0.298367 | \n", + "0.568741 | \n", + "107.270296 | \n", + "245.0 | \n", + "0.155102 | \n", + "0.746939 | \n", + "0.032653 | \n", + "0.065306 | \n", + "1.067606 | \n", + "230.939702 | \n", + "
156 | \n", + "2020-04-01 | \n", + "95.0 | \n", + "2.0 | \n", + "0.654050 | \n", + "169.0 | \n", + "3.0 | \n", + "0.645553 | \n", + "9.0 | \n", + "0.0 | \n", + "0.372474 | \n", + "... | \n", + "4.818550 | \n", + "1.622657 | \n", + "306.049552 | \n", + "307.0 | \n", + "0.309446 | \n", + "0.550489 | \n", + "0.029316 | \n", + "0.110749 | \n", + "1.102333 | \n", + "238.451721 | \n", + "
158 | \n", + "2020-06-01 | \n", + "97.0 | \n", + "5.0 | \n", + "1.998435 | \n", + "114.0 | \n", + "1.0 | \n", + "0.003377 | \n", + "0.0 | \n", + "0.0 | \n", + "0.000000 | \n", + "... | \n", + "0.298367 | \n", + "0.575045 | \n", + "108.459269 | \n", + "220.0 | \n", + "0.440909 | \n", + "0.518182 | \n", + "0.000000 | \n", + "0.040909 | \n", + "0.895084 | \n", + "193.620595 | \n", + "
159 | \n", + "2020-07-01 | \n", + "80.0 | \n", + "3.0 | \n", + "1.379169 | \n", + "156.0 | \n", + "1.0 | \n", + "0.165519 | \n", + "0.0 | \n", + "0.0 | \n", + "0.000000 | \n", + "... | \n", + "6.950599 | \n", + "2.123822 | \n", + "400.574369 | \n", + "248.0 | \n", + "0.322581 | \n", + "0.629032 | \n", + "0.000000 | \n", + "0.048387 | \n", + "0.885329 | \n", + "191.510509 | \n", + "
194 | \n", + "2023-06-01 | \n", + "92.0 | \n", + "2.0 | \n", + "0.684316 | \n", + "128.0 | \n", + "2.0 | \n", + "0.493477 | \n", + "7.0 | \n", + "0.0 | \n", + "0.372474 | \n", + "... | \n", + "2.378174 | \n", + "0.982110 | \n", + "185.235975 | \n", + "292.0 | \n", + "0.315068 | \n", + "0.438356 | \n", + "0.023973 | \n", + "0.222603 | \n", + "0.970242 | \n", + "209.878510 | \n", + "
10 rows × 22 columns
\n", + "