-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsum_enum_by_year.R
29 lines (27 loc) · 2.7 KB
/
sum_enum_by_year.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
DESCRIPTION <- "build a table of year by some enumeration in the data. This is specifically for summing enumeration values rather than counting where the enumeration is true. Extremely useful for stream, stacked bar, and line charts."
load(data)
# motive by year (2009-2014) for Breaches
## TO USE: 0. Change the filters in the first line unless you want no Error actions, only Breaches, and no other filters
## 1. Change the starts_with()'s in the first select statement to the columns you want
## 2. Change the "4" in the separate line to the number of columns. (if you're doing actor.Internal.motive.m, it'd be 4 because there's 4 parts.)
## 3. Change the "4" in the select line below the separate line to whatever number you used in step 2.
## 4. Change the "4" in the rename line to whatever number you used in step 2.
## NOTE 1: This only works if all enumerations are the same level down the tree. (ie no actor.Internal.motive.m joined with actor.unknown)
## NOTE 2: If you remove the separate-select-rename lines you don't join the lower level values
temp <- vz %>%
filter(attribute.confidentiality.data_disclosure.Yes, action.Error != TRUE) %>% # Filter to the data we want to work on.
select(starts_with("attribute.confidentiality.data.amount."),
timeline.incident.year) %>% # Select only the data column(s) we plan to sum & the year we plan to sum by
gather(enum,
value,
-timeline.incident.year) %>% # Gather column name - value pairs into 2 columns: enum - value
filter(!is.na(value)) %>% # Remove the values which weren't filled in
separate(enum, 1:5, "[.]", extra="merge") %>% # separate the sections of the VERIS enumeration
select(timeline.incident.year, `5`, value) %>% # Remove the other columns generated by splitting VERIS enumerations
rename(enum = `5`) %>% # Rename the last column back to "enum" (which it was before)
group_by(timeline.incident.year, enum) %>% # group the data by year and enumeration (the cells in the ultimate table)
summarize(x = sum(value)) %>% # Each cell's value is the sum of values
mutate(n = sum(x)) %>% # Sum the x values to get an n. I don't know why, but this does it by the first group_by grouping (in this case 'year')
mutate(freq = round(100 * x/n, 2), `count/total` = paste(x, n, sep="/")) # Create frequency and count/total columns from x and n
temp %>% acast(enum~timeline.incident.year, value.var="count/total") # turn the enum, year, count/total columns into a table with columns of year, rows of enum, and values of count/total
temp %>% acast(enum~timeline.incident.year, value.var="freq", fill=0) # turn the enum, year, freq columns into a table with columns of year, rows of enum, and values of freq