-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathElections.Rmd
96 lines (86 loc) · 3.47 KB
/
Elections.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
---
title: "Elections"
output: null_document
date: "`r Sys.Date()`"
---
```{r setup, include=FALSE}
if(!require(dplyr)) {install.packages("dplyr"); library(dplyr)}
if(!require(tidyverse)) {install.packages("tidyverse"); library(tidyverse)}
if(!require(stringr)) {install.packages("stringr"); library(stringr)}
if(!require(stringi)) {install.packages("stringi"); library(stringi)}
```
```{r}
data <- read.csv("./data/ElectionResults.csv", header = FALSE)
cols_to_keep_indices <- integer()
for (i in 1:ncol(data)) {
# Access the value in the first row and current column
header_value <- data[2, i] # Adjusted to data[1, i] to refer to the first row, considering header=FALSE
if (str_detect(header_value, "Please rank your preference of candidates for")) {
# Add the column index to the list
cols_to_keep_indices <- c(cols_to_keep_indices, i)
}
}
data <- data[,cols_to_keep_indices]
data <- data[-1,]
title_mapping <- list(
"Chief Executive Officer" = "CEO",
"Chief Financial Officer" = "CFO",
"Chief Operations Officer" = "COO",
"Chief Marketing Officer" = "CMO"
)
new_col_names <- character(length = length(cols_to_keep_indices))
# Loop through each column index in cols_to_keep_indices
for (i in seq_along(cols_to_keep_indices)) {
header_value <- data[1, i] # Access the title in the first row for the current column
header_value_ascii <- stri_trans_general(header_value, "Latin-ASCII")
# Extract the position title and the rank number
for (title in names(title_mapping)) {
if (str_detect(header_value_ascii, title)) {
rank <- str_extract(header_value, "\\d+$") # Extract the digits at the end
short_title <- title_mapping[[title]] # Get the short form of the title
new_col_names[i] <- sprintf("%s_Rank_%s", short_title, rank) # Create new name format
break # Exit the loop after matching to a title
}
}
}
# Now, apply the new column names to the filtered data
names(data) <- new_col_names
data <- data[-1,]
results_list <- list()
for (col_name in names(data)) {
counts <- as.data.frame(table(data[[col_name]]))
# Check if counts data frame is not empty
if (nrow(counts) > 0) {
counts$Freq <- as.numeric(counts$Freq) # Convert Freq to numeric for sorting
counts <- counts[order(-counts$Freq), ] # Sort by Freq in descending order
names(counts) <- c("Candidate", paste0(col_name, "_Count")) # Rename columns
results_list[[col_name]] <- counts # Add to results list
} else {
# Handle the case where counts is empty by adding a placeholder row if needed
results_list[[col_name]] <- data.frame(Candidate=NA, Count=0)
}
}
# Now, adjust lengths and combine
max_rows <- max(sapply(results_list, nrow))
# Standardize column names for the combination
standardized_list <- lapply(results_list, function(df) {
names(df) <- c("Candidate", "Count") # Standardize names
df
})
# Adjust lengths
adjusted_list <- lapply(standardized_list, function(df) {
if (nrow(df) < max_rows) {
additional_rows <- max_rows - nrow(df)
df <- rbind(df, data.frame(Candidate = rep(NA, additional_rows), Count = rep(NA, additional_rows)))
}
df
})
# Combine the data frames side by side
results_df <- do.call(cbind, adjusted_list)
# Correct the column names to reflect their original data source
col_names <- unlist(lapply(names(data), function(col_name) c(paste0(col_name, "_Candidate"), paste0(col_name, "_Count"))))
names(results_df) <- col_names
rownames(results_df) <- NULL
results_df
#write.csv(results_df, "./data/elections_results.csv")
```