-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean_gdelt.do
89 lines (60 loc) · 3.55 KB
/
clean_gdelt.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
use "input/GDELT/gov2gov-events-by-month.dta", clear
gen year = real(substr(YearMonth, 1, 4))
collapse (sum) Mentions3 Events3 Mentions4 Events4 Mentions5 Events5 Mentions6 Events6, by(Actor1CountryCode Actor2CountryCode year)
gen iso3_od = cond(Actor1CountryCode <= Actor2CountryCode, Actor1CountryCode, Actor2CountryCode) + cond(Actor1CountryCode >= Actor2CountryCode, Actor1CountryCode, Actor2CountryCode)
gen iso3_od_dir = Actor1CountryCode + Actor2CountryCode
sum Mentions3 if Actor1CountryCode == "HUN" & Actor2CountryCode == "DEU" & year == 2018
sum Mentions3 if Actor1CountryCode == "DEU" & Actor2CountryCode == "HUN" & year == 2018
sum Mentions3 if iso3_od == "DEUHUN" & year == 2018
*collapse (mean) Mentions3 Events3 Mentions4 Events4 Mentions5 Events5 Mentions6 Events6, by(iso3_od year)
*sum Mentions3 if iso3_od == "DEUHUN" & year == 2018
gen intent_events = Events3 //if !missing(Events3)
egen visits_events = rowtotal(Events4 Events5 Events6) //if !missing(Events4,Events5,Events6)
gen intent_mentions = Mentions3 //if !missing(Events3)
egen visits_mentions = rowtotal(Mentions4 Mentions5 Mentions6) //if !missing(Events4,Events5,Events6)
drop Actor2CountryCode Events* Mentions*
rename Actor1CountryCode actor
*rename Actor2CountryCode actor2
*reshape long actor, i(iso3_od_dir year) j(country)
*reshape wide actor intent_events visits_events intent_mentions visits_mentions, i(iso3_od_dir year) j(country)
bys iso3_od year (actor): gen order = _n
sum intent_mentions if actor == "HUN" & iso3_od == "DEUHUN" & year == 2018
sum intent_mentions if actor == "DEU" & iso3_od == "DEUHUN" & year == 2018
sum intent_mentions if iso3_od == "DEUHUN" & year == 2018
reshape wide actor iso3_od_dir intent_events visits_events intent_mentions visits_mentions, i(iso3_od year) j(order)
sum intent_mentions1 if iso3_od == "DEUHUN" & year == 2018
sum intent_mentions2 if iso3_od == "DEUHUN" & year == 2018
expand 2
bys iso3_od year: gen order = _n
foreach var in actor iso3_od_dir intent_events visits_events intent_mentions visits_mentions {
clonevar `var'1_original = `var'1
replace `var'1 = `var'2 if order == 2
replace `var'2 = `var'1_original if order == 2
drop `var'1_original
rename `var'1 `var'_exporter
rename `var'2 `var'_importer
}
*browse if iso3_od == "DEUHUN"
rename iso3_od_dir_exporter iso3_od_dir
drop if iso3_od_dir == ""
count
count if missing(actor_exporter)
count if missing(actor_importer)
replace actor_importer = substr(iso3_od_dir,4,3) if actor_importer == ""
count if missing(actor_importer)
gen eu = 0
foreach iso in AUT BEL BGR CYP CZE DEU DNK ESP EST FIN FRA GBR GRC HRV HUN IRL ITA LTU LUX LVA MLT NLD POL PRT ROM SVK SVN SWE {
replace eu = 1 if actor_exporter == "`iso'"
}
bys actor_importer year: egen intent_events_eu = mean(intent_events_exporter) if eu
bys actor_importer year: egen visits_events_eu = mean(visits_events_exporter) if eu
drop iso3_od_dir_importer actor_exporter actor_importer order iso3_od eu
twoway (histogram intent_events_exporter, color(green%30)) (histogram visits_events_exporter, color(red%30)) if intent_events_exporter <= 300 & visits_events_exporter <= 300, legend(order(1 "intent" 2 "visits" )) graphregion(color(white)) ytitle("") xtitle("number of events")
graph export "output/hist_dependent_before.png", replace
save "temp/gdelt-clean.dta", replace
keep if substr(iso3_od_dir,1,3) == "EUR"
rename intent_events_exporter intent_events_agency
rename visits_events_exporter visits_events_agency
gen iso3_d = substr(iso3_od_dir,4,3)
keep year intent_events_agency visits_events_agency iso3_d
save "temp/gdelt-agency-clean.dta", replace