Skip to content

Commit

Permalink
Merge pull request #38 from XPRIZE/#22-Team-Chimple-Extract-Storybook…
Browse files Browse the repository at this point in the history
…-Events

#22 team chimple extract storybook events
  • Loading branch information
jo-xprize authored Oct 29, 2019
2 parents 4138646 + b3a93fd commit 34c80e0
Show file tree
Hide file tree
Showing 63 changed files with 27,033 additions and 3 deletions.
28 changes: 25 additions & 3 deletions team-CHIMPLE/storybook-events/extract_storybook_events_from_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ def extract_from_week(directory_containing_weekly_data):
continue

# Extract tablet serial number from the parent folder's name
# E.g. "2019-02-08/29/REMOTE/5A27001390/userlog.1548797314282.csv" --> "5A27001390"
# E.g. "2019-02-08/29/REMOTE/5A27001390/userlog.1548797314282.csv"
# --> "2019-02-08/29/REMOTE/5A27001390"
# --> "5A27001390"
tablet_serial = file_path.replace("/" + basename, "")
tablet_serial = tablet_serial[len(tablet_serial)-10:len(tablet_serial)]
print(os.path.basename(__file__), "tablet_serial: \"{}\"".format(tablet_serial))
Expand All @@ -86,11 +88,25 @@ def extract_from_week(directory_containing_weekly_data):
is_valid_tablet_serial_number = serial_number_util.is_valid(tablet_serial)
print(os.path.basename(__file__), "is_valid_tablet_serial_number: {}".format(is_valid_tablet_serial_number))
if not is_valid_tablet_serial_number:
raise ValueError("Invalid tablet_serial: \"{}\"".format(tablet_serial))
# Invalid serial number. Skip file.
warnings.warn("The parent folder does not represent a 10-character serial number: \"{}\"".format(tablet_serial))
continue

# Extract storybook events from CSV
with open(file_path) as csv_file:
csv_data = csv.reader(csv_file)

# Skip if corrupt file content
try:
for storybook_event_row in csv_data:
# Only check if the first line in the file is valid, and then skip iteration of the rest of the file.
break
except csv.Error:
# Handle "_csv.Error: line contains NULL byte"
# Example: 2018-09-07/35/REMOTE/6111001892/userlog.1532930088249.csv contains "^@^@^@^@^@^@^@^@^@^@"
warnings.warn("Skipping file which contains NULL byte")
continue

for storybook_event_row in csv_data:
print(os.path.basename(__file__), "storybook_event_row: {}".format(storybook_event_row))

Expand Down Expand Up @@ -123,7 +139,13 @@ def extract_from_week(directory_containing_weekly_data):
storybook_start_time = arrow.get(userlog_logged_at, "ddd MMM DD HH:mm:ss ZZZZZ YYYY").timestamp
elif len(userlog_logged_at) == 28:
# E.g. "Wed Jan 09 09:55:25 PST 2019"
storybook_start_time = arrow.get(userlog_logged_at, "ddd MMM DD HH:mm:ss ZZZ YYYY").timestamp
try:
storybook_start_time = arrow.get(userlog_logged_at, "ddd MMM DD HH:mm:ss ZZZ YYYY").timestamp
except arrow.parser.ParserError:
# Handle "arrow.parser.ParserError: Could not parse timezone expression "AST"".
# E.g. "Sun Jan 09 21:51:30 AST 2000" or "Thu Aug 29 15:25:29 EDT 2019"
warnings.warn("Skipping invalid timezone expression")
continue
print(os.path.basename(__file__), "storybook_start_time: {}".format(storybook_start_time))

# Storybook end time is not stored, so set to None
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Collects storybook events from multiple weeks of data and combines them into one file.
#
# Example usage:
# cd storybook-events
# python3 extract_storybook_events_from_multiple_weeks.py ../tablet-usage-data
#
# The extracted data will be stored in a file named `storybook-events-CHIMPLE.csv`.

import sys
import os

import extract_storybook_events_from_csv

# A directory containing multiple subdirectories on the format "2017-12-22", "2017-12-29", etc.
BASE_PATH = "../tablet-usage-data"
if len(sys.argv) > 1:
BASE_PATH = sys.argv[1]
print(os.path.basename(__file__), "BASE_PATH: {}".format(BASE_PATH))

data_collection_week_end_dates = [
'2017-12-22', '2017-12-29',
'2018-01-05', '2018-01-12', '2018-01-19', '2018-01-26',
'2018-02-02', '2018-02-09', '2018-02-16', '2018-02-23',
'2018-03-02', '2018-03-09', '2018-03-16', '2018-03-23', '2018-03-30',
'2018-04-06', '2018-04-13', '2018-04-20', '2018-04-27',
'2018-05-04', '2018-05-11', '2018-05-18', '2018-05-25',
'2018-06-01', '2018-06-08', '2018-06-15', '2018-06-22', '2018-06-29',
'2018-07-06', '2018-07-13', '2018-07-20', '2018-07-27',
'2018-08-03', '2018-08-10', '2018-08-17', '2018-08-24', '2018-08-31',
'2018-09-07', '2018-09-14', '2018-09-21', '2018-09-28',
'2018-10-05', '2018-10-12', '2018-10-19', '2018-10-26',
'2018-11-02', '2018-11-09', '2018-11-16', '2018-11-23', '2018-11-30',
'2018-12-07', '2018-12-14', '2018-12-21', '2018-12-28',
'2019-01-04', '2019-01-11', '2019-01-18', '2019-01-25',
'2019-02-01', '2019-02-08', '2019-02-22'
]
print(os.path.basename(__file__), "len(data_collection_week_end_dates): {}".format(len(data_collection_week_end_dates)))

# Extract storybook events and store them in a CSV file for each week of data
for week_end_date in data_collection_week_end_dates:
directory_containing_weekly_data = BASE_PATH + os.sep + week_end_date
print(os.path.basename(__file__), "directory_containing_weekly_data: \"{}\"".format(directory_containing_weekly_data))
extract_storybook_events_from_csv.extract_from_week(directory_containing_weekly_data)

# Combine each CSV file for one week of data into one file
print(os.path.basename(__file__), "Writing data to \"storybook-events-CHIMPLE.csv\"...")
with open('storybook-events-CHIMPLE.csv', 'w') as outfile:
infile_count = 0
for week_end_date in data_collection_week_end_dates:
print(os.path.basename(__file__), "\n\n"
"**********\n")
csv_filename_weekly = "storybook-events-CHIMPLE_" + week_end_date + ".csv"
print(os.path.basename(__file__), "csv_filename: \"{}\"".format(csv_filename_weekly))
with open(csv_filename_weekly) as infile:
infile_row_count = 0
for line in infile:
print(os.path.basename(__file__), "line: {}".format(line))
print(os.path.basename(__file__), "infile_row_count: {}".format(infile_row_count))

# Column headers are included in each weekly file.
# Only include them once, and skip them for each subsequent file.
is_column_header_row = (infile_row_count == 0)
is_first_infile = (infile_count == 0)
skip_row = (is_column_header_row and not is_first_infile)

if not skip_row:
outfile.write(line)

infile_row_count += 1
infile_count += 1
print(os.path.basename(__file__), "Writing data to \"storybook-events-CHIMPLE.csv\" complete!")
Loading

0 comments on commit 34c80e0

Please sign in to comment.