-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
85 lines (64 loc) · 3.01 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
import os
from process_data import *
BASE_DIR = path.dirname(path.dirname(path.abspath(__file__)))
BASE_DIR = BASE_DIR + '/HR-LP'
station_set = ['DLA3', 'DLA9', 'DLA7', 'DLA4', 'DLA8', 'DLA5', 'DSE4', 'DSE5', 'DSE2', 'DCH4', 'DCH3', 'DCH2',
'DCH1', 'DBO2', 'DBO3', 'DBO1', 'DAU1']
station_dict = {station: [] for station in station_set}
path1 = path.join(BASE_DIR, 'data/model_apply_inputs/')
path2 = path.join(BASE_DIR, 'data/model_score_inputs/')
def split_routeID(routes):
n = len(routes)
# shuffle(routes)
train = routes[:ceil(0.7 * n)]
test = routes[ceil(0.7 * n):]
return train, test
def process_dict(id, data):
data_dict = {}
for i in id:
data_dict[i] = data[i]
return data_dict
def load_json(file):
with open(file, "rb") as f:
output = json.load(f)
return output
def dump_json(file, target):
output_path = path.join(BASE_DIR, file)
with open(output_path, 'w') as out_file:
json.dump(target, out_file) # indent=4, ensure_ascii=True
def mkdir(path):
folder = os.path.exists(path)
if not folder:
os.makedirs(path)
else:
print("there exists folder")
def split_data(index, route_id, BASE_DIR,
route_data, package_data, actual_sequences, travel_times, invalid_sequence_scores, type='train'):
train_id, test_id = split_routeID(route_id)
print('train data num:', len(train_id))
print('test data num:', len(test_id))
build_route_data = process_dict(train_id, route_data)
build_package_data = process_dict(train_id, package_data)
build_actual_sequences = process_dict(train_id, actual_sequences)
build_travel_times = process_dict(train_id, travel_times)
build_invalid_sequence_scores = process_dict(train_id, invalid_sequence_scores)
# output file
if type == 'train':
dir = BASE_DIR + '/data/model_build_inputs_' + index
mkdir(dir)
dump_json(dir + '/route_data.json', build_route_data)
dump_json(dir + '/package_data.json', build_package_data)
dump_json(dir + '/actual_sequences.json', build_actual_sequences)
dump_json(dir + '/travel_times.json', build_travel_times)
dump_json(dir + '/invalid_sequence_scores.json', build_invalid_sequence_scores)
dump_json(BASE_DIR + '/data/model_apply_inputs/new_route_data_' + index + '.json',
process_dict(test_id, route_data))
dump_json(BASE_DIR + '/data/model_apply_inputs/new_package_data_' + index + '.json',
process_dict(test_id, package_data))
dump_json(BASE_DIR + '/data/model_score_inputs/new_actual_sequences_' + index + '.json',
process_dict(test_id, actual_sequences))
dump_json(BASE_DIR + '/data/model_apply_inputs/new_travel_times_' + index + '.json',
process_dict(test_id, travel_times))
dump_json(BASE_DIR + '/data/model_score_inputs/new_invalid_sequence_scores_' + index + '.json',
process_dict(test_id, invalid_sequence_scores))