-
Notifications
You must be signed in to change notification settings - Fork 21
/
cd_traj.py
98 lines (83 loc) · 3.41 KB
/
cd_traj.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# link: Please contact the author!
from zipfile import ZipFile
import re
import os
import csv
import json
from util import int_to_isoformat, ensure_dir
pattern = re.compile(r"(\S+),(\S+),\"\[(.+)]\"\n")
detail_pattern = re.compile(r"(\S+) (\S+) (\S+), ")
geo_cnt = 0
def dumpconfig(data_name):
config = dict()
config['usr'] = dict()
config['usr']['properties'] = dict()
config['dyna'] = dict()
config['dyna']['including_types'] = ['trajectory']
config['dyna']['trajectory'] = {'entity_id': 'usr_id',
'coordinates': 'coordinate',
'traj_id': 'num'}
json.dump(config, open(os.path.join(data_name, 'config.json'),
'w', encoding='utf-8'), ensure_ascii=False)
def get_dyna(file, name, binary):
print("starting " + name)
wrong_columns = []
output_dir = os.path.join("output", name)
ensure_dir(output_dir)
dyna_cnt = 0
dyna_file = open(os.path.join(output_dir, name + ".dyna"), "w", newline='')
dyna_writer = csv.writer(dyna_file)
dyna_writer.writerow(["dyna_id", "type", "time", "entity_id", "traj_id", "coordinates"])
ids = {}
cur_id = 0
usr_file = open(os.path.join(output_dir, name + ".usr"), "w", newline='')
usr_writer = csv.writer(usr_file)
usr_writer.writerow(["usr_id"])
for line in file:
if binary:
line = line.decode('ascii')
match = pattern.match(line)
if match:
groups = match.groups()
customer_id = groups[0] # unused temporarily
driver_id = groups[1]
position_list = groups[2] + ", "
# get entity_id, traj_id
if driver_id not in ids:
ids[driver_id] = (cur_id, -1)
usr_writer.writerow([cur_id])
cur_id += 1
ids[driver_id] = (ids[driver_id][0], ids[driver_id][1] + 1)
entity_id, traj_id = ids[driver_id]
# get dyna
positions = re.findall(detail_pattern, position_list)
for detail in positions:
longitude, latitude, time = detail
cur_time = int_to_isoformat(int(time))
coords = [float(longitude), float(latitude)]
dyna_col = [dyna_cnt, "trajectory", cur_time,
entity_id, traj_id, coords]
dyna_writer.writerow(dyna_col)
dyna_cnt += 1
else:
wrong_columns.append(line)
dumpconfig(output_dir)
if len(wrong_columns) > 0:
print("wrong_columns in " + name + ":")
print(wrong_columns)
print("finished " + name)
print()
def get_dynas(filenames, DATA_NAME="cd_traj"):
for filename in filenames:
time = filename.lstrip("abcdefghijklmnopqrstuvwxyz")
input_dir = os.path.join("input", DATA_NAME)
if os.path.exists(os.path.join(input_dir, filename + ".zip")):
myzip = ZipFile(os.path.join(input_dir, filename + ".zip"))
f = myzip.open(filename + ".csv")
get_dyna(f, DATA_NAME + time, binary=True)
elif os.path.exists(os.path.join(input_dir, filename + ".csv")):
f = open(os.path.join(input_dir, filename + ".csv"))
get_dyna(f, DATA_NAME + time, binary=False)
if __name__ == "__main__":
DATA_NAME = "cd_traj"
get_dynas(["chengdushi_1101_1110", "chengdushi_1110_1120", "chengdushi_1120_1130"], DATA_NAME)