-
Notifications
You must be signed in to change notification settings - Fork 2
/
ps2014.Rmd
178 lines (140 loc) · 4.26 KB
/
ps2014.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
---
title: "ps2014 Public Speaking 2014 Data"
output: html_notebook
---
This notebook serves the following functions:
- load and parse human rating results on the ps2014 data set
- analyze rating quality
- link human ratings on both PS and IV tasks.
```{r lib}
library(tidyverse)
library(stringr)
library(magrittr)
```
# Public Speaking Data Set
## Data reading
Using tidyverse::readxl to load human ratings stored in Excel
```{r hs_load}
library(readxl)
ps<-read_excel('hsraw/PublicSpeakingSkillsAssessment-data.xlsx')
# Used non-verbal behavior that reinforced the (verbal) message Had a cohesive presentation Was easy to understand Used monotonous speech Used appropriately sophisticated expressions Was nervous Kept my attention throughout the entire presentation Did not have a well-paced presentation
ps <- ps %>%
setNames(.,
c("rater_id",
"video_id",
"rubric_ps1",
"rubric_ps2",
"rubric_ps3",
"rubric_ps4",
"holistic",
"pers_1",
"pers_2",
"pers_3",
"pers_4",
"pers_5",
"char_nonverbal",
"char_cohesion",
"char_understandable",
"char_monotone",
"char_expression",
"char_nervous",
"char_attention",
"char_pace"
))
# remove 1st row with sub-category names
ps <- ps[-1,]
# convert char cols to dbl.
# http://bit.ly/2mXJz7V
ps <- ps %>%
mutate_at(vars(matches("rubric|pers|char")),funs(as.numeric))
```
## Rating Quality
```{r irr_funs}
library(irr)
my.irr <- function(mx){
icc <- icc(mx, model="twoway", type="consistency", unit="average")$value
corr <- meancor(mx)$value
c(icc, corr)
}
```
```{r one_rating}
one_rating <- function(score_name) {
one_tb <- ps %>%
select_(.dots = c('rater_id', 'video_id', score_name))
one_tb <-
one_tb[!duplicated(one_tb[, c("rater_id", "video_id")]),] %>%
spread_(key_col = 'rater_id', value_col = score_name)
return(my.irr(one_tb[2:5]))
}
ratings <- colnames(ps)[3:20]
irr_df <- sapply(ratings, FUN = one_rating)
irr_df <- t(irr_df)
colnames(irr_df) <- c('ICC', 'meanR')
show(irr_df)
```
As for rating quality on interview data, ICC values can be found in vi2014.html.
# Cross-analysis
## Cross two tasks
For each subject, averaged public speaking scores vs. averaged interview scores.
```{r}
# for persubj ratings, get subj and cat
getsubj <- function(name){
subj <- str_sub(name, 2, 3)
subj
}
ps_subj <- ps %>%
mutate(subj=getsubj(video_id)) %>%
group_by(subj) %>%
summarise(hscore_ave = mean(holistic))
```
Using bars_persubj_final (36x4) object, to compute correlations. The very low correlation
between interview and presentation shows that it is important to measure job-related skill during interviews.
```{r}
load(file="vi2014.RData")
twotasks_subj <- merge(ps_subj, bars_persubj_final, by="subj")
cor(twotasks_subj[,2:5])
```
## Within a task, check features' inter-correlations.
### interview
```{r}
# from pers_wide (vid, big 5, holistic)
# bars_indv (videoID, mean)
interview_item <-
merge(pers_wide[, 1:7], bars_indv[c("videoID", "mean", "gp")], by.x = "vid", by.y =
"videoID")
cor(interview_item[, 2:8], use = "complete.obs")
```
### presentation
```{r}
ps_item <- ps %>%
select(video_id:char_pace) %>%
group_by(video_id) %>%
summarise_each(funs(mean))
cor(ps_item[,2:19])
```
Analyze motion features' contributions on predicting human rated scores.
```{r vamp_corr, echo=TRUE}
load("../vamp_postproc/ps2014_vamp.RData")
tidy_id <- function(x){
tks <- str_match(x, "SELTCAWRS_(\\d+)([enpi_]+)")
if(str_length(tks[2]) == 3){
vid = str_sub(tks[2], 2,3)
}
else{
vid = tks[2]
}
new_id <- paste("P", vid, tks[3], sep = "")
new_id <- str_replace(new_id, "(_)$", "") # _ before string end
}
feats_persession <- feats_persession %>%
mutate(video_id = unlist(lapply(labsession, tidy_id))) %>%
select(-(starts_with("symmetry")))
ps_item_vamp <- merge(ps_item, feats_persession[,2:46], by="video_id")
vamp_tb<- cor(ps_item_vamp[,20:63], ps_item_vamp[,2:6], use = "na.or.complete")
```
Only showing correlation absolute value is higher than $0.2$
```{r}
vamp_tb_high <- vamp_tb
vamp_tb_high[abs(vamp_tb_high) < 0.2] <- ""
show(vamp_tb_high)
```