-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path5. Knn Grid.py
108 lines (83 loc) · 2.24 KB
/
5. Knn Grid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import datetime
from heapq import nlargest
from operator import itemgetter
import os
import time
import math
from collections import defaultdict
def prep_xy(x, y):
range = 800
ix = math.floor(range*x/10)
if ix < 0:
ix = 0
if ix >= range:
ix = range-1
iy = math.floor(range*y/10)
if iy < 0:
iy = 0
if iy >= range:
iy = range-1
return ix, iy
def run_solution():
print('Preparing data...')
f = open("../train.csv", "r")
f.readline()
total = 0
grid = defaultdict(lambda: defaultdict(int))
grid_sorted = dict()
# Calc counts
while 1:
line = f.readline().strip()
total += 1
if line == '':
break
arr = line.split(",")
row_id = arr[0]
x = float(arr[1])
y = float(arr[2])
accuracy = arr[3]
time = arr[4]
place_id = arr[5]
ix, iy = prep_xy(x, y)
grid[(ix, iy)][place_id] += 1
f.close()
# Sort array
for el in grid:
grid_sorted[el] = nlargest(3, sorted(grid[el].items()), key=itemgetter(1))
print('Generate submission...')
sub_file = os.path.join('submission_' + str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) + '.csv')
out = open(sub_file, "w")
f = open("../input/test.csv", "r")
f.readline()
total = 0
out.write("row_id,place_id\n")
while 1:
line = f.readline().strip()
total += 1
if line == '':
break
arr = line.split(",")
row_id = arr[0]
x = float(arr[1])
y = float(arr[2])
accuracy = arr[3]
time = arr[4]
out.write(str(row_id) + ',')
filled = []
ix, iy = prep_xy(x, y)
s1 = (ix, iy)
if s1 in grid_sorted:
topitems = grid_sorted[s1]
for i in range(len(topitems)):
if topitems[i][0] in filled:
continue
if len(filled) == 3:
break
out.write(' ' + topitems[i][0])
filled.append(topitems[i][0])
out.write("\n")
out.close()
f.close()
start_time = time.time()
run_solution()
print("Elapsed time overall: %s seconds" % (time.time() - start_time))