Skip to content

Commit

Permalink
#46 Team onebillion: Fix incorrect serial numbers in tablet tracker
Browse files Browse the repository at this point in the history
- Calculate percentage match between serial numbers in tracker and usage data.
  • Loading branch information
jo-xprize committed Nov 11, 2019
1 parent 7c67cd9 commit 55504bd
Show file tree
Hide file tree
Showing 4 changed files with 452 additions and 400 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,23 @@
logging.debug("len(serial_numbers_in_tablet_tracker): {}".format(len(serial_numbers_in_tablet_tracker)))


# Returns a percentage indicating how similar two tablet serial numbers are.
# Example: (5A23002711, 5A23002751) --> 0.8
def get_serial_match_ratio(serial1, serial2):
logging.debug("get_serial_match_ratio")

# Calculate number of matches
serial_match_count = 0
for index in range(0, 9):
if serial1[index] == serial2[index]:
serial_match_count += 1

serial_match_ratio = serial_match_count / 10
logging.debug("sequence_match({0}, {1}): {2}".format(serial1, serial2, serial_match_ratio))

return serial_match_ratio


# Iterate the serial numbers in `tablets-uploading-data-ONEBILLION.csv`
with open('../tablets-uploading-data/tablets-uploading-data-ONEBILLION.csv') as in_file:
serial_numbers_not_found_in_tablet_tracker = []
Expand Down Expand Up @@ -84,11 +101,20 @@

# Write results to a CSV file
csv_filename = "serial-numbers-not-found-in-tablet-tracker.csv"
print("Writing list of missing tablet serials to the file \"" + csv_filename + "\"")
logging.debug("Writing list of missing tablet serials to the file \"" + csv_filename + "\"")
with open(csv_filename, mode='w') as csv_file:
csv_fieldnames = ['serial_number']
csv_fieldnames = ['serial_number', 'closest_match', 'closest_match_percentage']
csv_writer = csv.writer(csv_file, csv_fieldnames)
csv_writer.writerow(csv_fieldnames)
for serial_number in serial_numbers_not_found_in_tablet_tracker:
serial_number_as_list = [serial_number]
csv_writer.writerow(serial_number_as_list)
# Find the closest match in the tablet tracker
closest_match = None
closest_match_percentage = None
for serial_number_in_tablet_tracker in serial_numbers_in_tablet_tracker:
serial_match_ratio = get_serial_match_ratio(serial_number, serial_number_in_tablet_tracker)
if (closest_match_percentage is None) or (closest_match_percentage < serial_match_ratio):
closest_match_percentage = serial_match_ratio
closest_match = serial_number_in_tablet_tracker

csv_row = [serial_number, closest_match, closest_match_percentage]
csv_writer.writerow(csv_row)
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,23 @@
logging.debug("len(serial_numbers_in_tablet_usage_data): {}".format(len(serial_numbers_in_tablet_usage_data)))


# Returns a percentage indicating how similar two tablet serial numbers are.
# Example: (5A23002711, 5A23002751) --> 0.8
def get_serial_match_ratio(serial1, serial2):
logging.debug("get_serial_match_ratio")

# Calculate number of matches
serial_match_count = 0
for index in range(0, 9):
if serial1[index] == serial2[index]:
serial_match_count += 1

serial_match_ratio = serial_match_count / 10
logging.debug("sequence_match({0}, {1}): {2}".format(serial1, serial2, serial_match_ratio))

return serial_match_ratio


# Iterate the serial numbers in the tablet tracker
with open('tablet-tracker-ONEBILLION.csv') as in_file:
serial_numbers_not_found_in_tablet_usage_data = []
Expand Down Expand Up @@ -83,11 +100,20 @@

# Write results to a CSV file
csv_filename = "serial-numbers-not-found-in-tablet-usage-data.csv"
print("Writing list of missing tablet serials to the file \"" + csv_filename + "\"")
logging.debug("Writing list of missing tablet serials to the file \"" + csv_filename + "\"")
with open(csv_filename, mode='w') as csv_file:
csv_fieldnames = ['serial_number']
csv_fieldnames = ['serial_number', 'closest_match', 'closest_match_percentage']
csv_writer = csv.writer(csv_file, csv_fieldnames)
csv_writer.writerow(csv_fieldnames)
for serial_number in serial_numbers_not_found_in_tablet_usage_data:
serial_number_as_list = [serial_number]
csv_writer.writerow(serial_number_as_list)
# Find the closest match in the tablet usage data
closest_match = None
closest_match_percentage = None
for serial_number_in_tablet_usage_data in serial_numbers_in_tablet_usage_data:
serial_match_ratio = get_serial_match_ratio(serial_number, serial_number_in_tablet_usage_data)
if (closest_match_percentage is None) or (closest_match_percentage < serial_match_ratio):
closest_match_percentage = serial_match_ratio
closest_match = serial_number_in_tablet_usage_data

csv_row = [serial_number, closest_match, closest_match_percentage]
csv_writer.writerow(csv_row)
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
serial_number
5A23002711
5A27000730
5A27000909
5A27001802
5A28000016
5A28000063
5A29001362
5B13001290
5B20002050
5B20002381
5C01000192
5C01000564
6109001517
6111000738
6111000969
6111001065
6111001347
6111001392
6111001615
6111001757
6111001864
6115000433
6115001597
6115002170
6116001145
6116001176
6116001228
6116001401
6116001600
6116001983
6116002691
6116002775
6118002480
6118002670
6118002767
6118002869
6118003278
6118003568
6126000234
6129001189
6130000027
serial_number,closest_match,closest_match_percentage
5A23002711,5A23002751,0.8
5A27000730,5A27000731,0.9
5A27000909,5A27000924,0.8
5A27001802,5A27001806,0.9
5A28000016,5A28000417,0.8
5A28000063,5A28001065,0.8
5A29001362,5A29001357,0.8
5B13001290,5B13001498,0.8
5B20002050,5B20002255,0.8
5B20002381,5B20002286,0.8
5C01000192,5C04000177,0.7
5C01000564,5C04000177,0.6
6109001517,6109001591,0.8
6111000738,6111000756,0.8
6111000969,6111000974,0.8
6111001065,6111001060,0.9
6111001347,6116001343,0.8
6111001392,6111001096,0.8
6111001615,6111001619,0.9
6111001757,6111000756,0.8
6111001864,6111001869,0.9
6115000433,6115000461,0.8
6115001597,6115001496,0.8
6115002170,6111002172,0.8
6116001145,6116001147,0.9
6116001176,6116001178,0.9
6116001228,6116001229,0.9
6116001401,6116001402,0.9
6116001600,6116001606,0.9
6116001983,6116001981,0.9
6116002691,6116002694,0.9
6116002775,6116002578,0.8
6118002480,6118002407,0.8
6118002670,6118002672,0.9
6118002767,6118002764,0.9
6118002869,6118002850,0.8
6118003278,6118003275,0.9
6118003568,6118003567,0.9
6126000234,6126000273,0.8
6129001189,6109001185,0.8
6130000027,6130000023,0.9
Loading

0 comments on commit 55504bd

Please sign in to comment.