forked from benjamineverett/re_analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
block_creator.py
123 lines (99 loc) · 3.88 KB
/
block_creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import numpy as np
import os
'''
-- Get random blocks for Labeler class --
File contains two classes:
BlockCreator and Randomizer
BlockCreator parent to Randomizer
Randomizer is called from label_pics.py
Randomizer parent to Labeler
'''
class BlockCreator(object):
'''
-- Creates dictionary of all blocks in a neighborhood --
NOTE TO SELF
------------
Create better documentation
'''
def __init__(self,neighborhood):
self.dict = self._read_dict(neighborhood)
self.neighborhood = neighborhood
def _read_dict(self,neighborhood):
# open neighborhood file and read
with open('data/{}.txt'.format(neighborhood),'r') as f:
return eval(f.read())
def get_blocks(self):
# get list of blocks
blk_dict = self._get_dict()
self.blocks = self._create_blocks(dct = blk_dict)
def _get_dict(self):
# get dict of neighborhood with street names and block ranges
blocks = self.dict[self.neighborhood]['n_s']
blocks.update(self.dict[self.neighborhood]['e_w'])
return blocks
def _create_blocks(self,dct):
# create list that contains street name and block number
# e.g. ('master st', 1800)
blocks = []
for street in dct.keys():
current_block = dct[street][0]
end_block = dct[street][1]
while current_block <= end_block:
blocks.append((street,current_block))
current_block += 100
return blocks
def sanity_check(self):
# fail safe double check
print(self.dict)
print('\n\n')
print(self.blocks)
class Randomizer(BlockCreator):
'''
-- Randomly fetch blocks from list created in BlockCreator --
NOTE TO SELF
------------
Add better documentation throughout
'''
def __init__(self,neighborhood,num_pics):
super(Randomizer, self).__init__(neighborhood)
self.num_pics = num_pics
self.folder_path = 'pics/{}'.format(neighborhood)
self._initialize()
def get_random_pics(self):
self.pre_labeled_blocks = self._get_set_of_pre_labeled_blocks()
# np.random.seed(self.random_seed)
# shuffle blocks
np.random.shuffle(self.blocks)
# shuffle again, because it feels more random even though it is not
np.random.shuffle(self.blocks)
self.pics_to_label = []
self.labeled_blocks = []
while len(self.pics_to_label) < self.num_pics:
block = self.blocks.pop(0)
self.labeled_blocks.append(block)
if block not in self.pre_labeled_blocks:
for num in self._get_addresses(block).intersection(self.set_of_files):
self.pics_to_label.append((num,block))
def _initialize(self):
self.get_blocks()
self.set_of_files = self._get_set_of_pics()
def _get_set_of_pics(self):
# get file name, split off '.jpg' and take out zipcode
# e.g. 1345_n_26th_st_philadelphia_pa_19125.jpg
# -> 1345 n 26th st philadelphia pa
return {' '.join(os.fsdecode(file).split('_')[:-1]) for file in os.listdir(self.folder_path)}
def _get_set_of_pre_labeled_blocks(self):
blocks = []
with open('data/sampled_blocks.txt','r') as f:
lines = f.readlines()
for line in lines:
block = line[:-2].replace('(','').replace(')','').split(',')[0].replace("'","")
num = int(line[:-2].replace('(','').replace(')','').split(',')[1])
blocks.append((block,num))
return blocks
def _get_addresses(self,block):
return {'{} {} philadelphia pa'.format(num,block[0]) for num in range(block[1],block[1]+100)}
if __name__ == '__main__':
# fairmount = BlockCreator('fairmount')
# fairmount.get_blocks()
fairmount = Randomizer('fairmount',1000)