-
Notifications
You must be signed in to change notification settings - Fork 0
/
cog_data_v5
245 lines (167 loc) · 4.68 KB
/
cog_data_v5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
---
title: "cog_data_v5"
output: html_document
date: "2024-02-07"
---
purpose: refine master data into a dataset that I can use for the MICE imputation project
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(readxl)
library(tidyverse)
library(dplyr)
library(magrittr)
library(writexl)
```
# 1. Alltracers data
Import master data file
```{r}
alltracers <- read.csv("/Users/ella2/Downloads/Alltracers_data_long (2).csv")
```
Subset to narrow down to data of interest
```{r}
# filter to year 1 (baseline) only and cognitive testing/demographic data only
cog_data <- alltracers %>% filter(year == "ses-01", tracer == "Cogtest")
# subset to columns of interest
cog_data <- cog_data[ , c(1:7,26:54)]
# further subset down
# removing z scores, just want raw cognitve data
cog_data <- cog_data[ , c(1,36,4:7,8,10,12,13,15,16,18,19,21,23:29,31,33,35)]
# view output
View(cog_data)
```
Arrange by sub_num in ascendidng order
```{r}
# Convert sub_num to character (in case it's a factor)
cog_data$sub_num <- as.character(cog_data$sub_num)
# Extract the numeric part of the subject numbers
numeric_part <- as.numeric(gsub("sub-", "", cog_data$sub_num))
# Arrange the dataframe based on the numeric part in descending order
cog_data <- cog_data[order(numeric_part), ]
# view output
View(cog_data)
```
Export
```{r}
#write_xlsx(cog_data, "/Volumes/shr1/Statistics/Cognitive Imputation/cog_data_v4.xlsx")
```
#2. Add in data from 10 new enrollments
* re-do once Vanessa returns so we can just pull from alltracers.csv *
Get from Redcap because Vanessa is out so alltracers data is not updated
In cog_data_v4.. goes up to 154
```{r}
# import cogdatav4 to skip above steps
cog_data_v4 <- read_excel("/Volumes/H-LAND/Statistics/Cognitive Imputation/input/cog_data_v4.xlsx")
```
```{r}
# add in cog data for subjets after 154 .. from redcap
new_pts <- read.csv("/Volumes/H-LAND/Other Projects/working_files_SHB/cog imputation/aands data/new_enrollments.csv")
new_pts_demos <- read.csv("/Volumes/H-LAND/Other Projects/working_files_SHB/cog imputation/aands data/new_pts_basic_demos.csv")
```
```{r}
# join
new_pts <- left_join(new_pts_demos, new_pts, by = "hedden_id")
#View(new_pts)
#View(cog_data_v4)
# get colnames from cog_data_v4
# then subset new_pts to just those cols
cog_variables <- colnames(cog_data_v4)[7-25] # get column names of all cog vars
# get indices
match(cog_variables, colnames(new_pts))
```
```{r}
new <- new_pts[ , c(1:6,14,19,22:24,32,36,48,50,59,62,65,68,71,74,78,80,82)]
View(new)
ncol(new) # 24
ncol(cog_data_v4) # 25
# missing b_a in new
# need to calculate
# get data types
str(new)
```
##Calculate B - A
A Time
using for loop
```{r}
# Iterate over each row of the data frame
for (i in 1:nrow(new)) {
# Split the time string into minutes and seconds
time_parts <- strsplit(new$a_time[i], ":")[[1]]
# Convert to seconds as numeric values
seconds <- as.numeric(time_parts[2])
# Update the b_time column with total seconds
new$a_time[i] <- seconds
}
View(new)
```
B Time
using for() loop
```{r}
# Iterate over each row of the data frame
for (i in 1:nrow(new)) {
# Split the time string into minutes and seconds
time_parts <- strsplit(new$b_time[i], ":")[[1]]
# Convert minutes and seconds to numeric values
minutes <- as.numeric(time_parts[1])
seconds <- as.numeric(time_parts[2])
# Calculate total time in seconds
total_seconds <- minutes * 60 + seconds
# Update the b_time column with total seconds
new$b_time[i] <- total_seconds
}
View(new)
```
B - A
```{r}
# check data types
str(new)
# change data types
new$a_time <- as.numeric(new$a_time)
new$b_time <- as.numeric(new$b_time)
# calculate
new$b_a <- new$b_time - new$a_time
# reorder
new <- new[ , c(1:13,25,14:24)]
View(new)
```
Change hedden_id to sub_num then merge
```{r}
#change data type
new$hedden_id <- as.character(new$hedden_id)
# paste
new$sub_num <- paste0("sub-", new$hedden_id)
# reorder
new <- new[ , c(26,2:25)]
View(new)
```
#3. Rbind
Change data types so they are the same per column in each dataset
```{r}
ncol(new)
ncol(cog_data_v4)
str(new)
str(cog_data_v4)
# change all vars in new to numeric EXCEPT sub_num
# Convert all columns to numeric
#new[2:25] <- lapply(new, as.numeric)
# for loop to accomplish same goal
# Iterate over each col of the data frame
#for (i in 2:ncol(new)) {
# mutate to numeric if not numeric already
#if(!is.numeric(new[[i]])) {
# new[[i]] <- as.numeric(new[[i]])
# }
#}
str(new)
str(cog_data_v4)
```
```{r}
cog_data_v5 <- rbind(cog_data_v4, new)
View(cog_data_v5)
```
#2.8.24
1. re-run once sub-165 is added
2. re-run mice
- does the seed preserve values even if new subjects
3. re-run composites/QC