-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtimeBetweenRuns.py
50 lines (41 loc) · 2.18 KB
/
timeBetweenRuns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from projectConstants import *
import pandas as pd
def getTimestampRow(row):
'''Get a dictionary containing information for date time'''
return {
SUBJECT_ID_KEY: row[SUBJECT_ID_KEY],
ASSIGNMENT_ID_KEY: row[ASSIGNMENT_ID_KEY],
CLIENT_TIMESTAMP_KEY: row[CLIENT_TIMESTAMP_KEY],
DATE_TIME_KEY: pd.to_datetime(row.ClientTimestamp, unit='ms')
}
def getFilteredRunEvents(df):
'''
Get run events with execution action and start events only
'''
runDf = pd.DataFrame()
for _, row in df.iterrows():
if row.EventType == 'Run.Program':
if row['X-Metadata'] != 'Start':
continue
timeRow = pd.DataFrame(getTimestampRow(row), index=[0])
runDf = pd.concat([runDf, timeRow], ignore_index=True)
return runDf
def getTimeBetweenRuns(df, student, assignment):
'''Get time between runs for a student and assignment'''
studentRunsDf = df[(df.SubjectID == student)&(df.AssignmentID == assignment)].copy()
studentRunsDf.sort_values(by=DATE_TIME_KEY, inplace=True)
studentRunsDf[NEXT_DATE_TIME_KEY] = studentRunsDf[DATE_TIME_KEY].shift(-1)
studentRunsDf[DIFF_KEY] = studentRunsDf[NEXT_DATE_TIME_KEY] - studentRunsDf[DATE_TIME_KEY]
studentRunsDf[DAYS_DIFF_KEY] = round((studentRunsDf[NEXT_DATE_TIME_KEY] - studentRunsDf[DATE_TIME_KEY]).dt.days)
studentRunsDf[HOURS_DIFF_KEY] = round((studentRunsDf[NEXT_DATE_TIME_KEY] - studentRunsDf[DATE_TIME_KEY]).dt.seconds / 3600.0, 2)
studentRunsDf[MINUTES_DIFF_KEY] = round((studentRunsDf[NEXT_DATE_TIME_KEY] - studentRunsDf[DATE_TIME_KEY]).dt.seconds / 60.0, 2)
studentRunsDf[SECONDS_DIFF_KEY] = round((studentRunsDf[NEXT_DATE_TIME_KEY] - studentRunsDf[DATE_TIME_KEY]).dt.seconds, 2)
return studentRunsDf
def getTimeBetweenRunsDf(keystroke_df, final_data):
'''Get time between runs for each student'''
runEvents = getFilteredRunEvents(keystroke_df)
timeDifferenceDf = pd.DataFrame()
for student, assignment, _ in final_data:
studentDf = getTimeBetweenRuns(runEvents, student, assignment)
timeDifferenceDf = pd.concat([timeDifferenceDf, studentDf], ignore_index=True)
return timeDifferenceDf