-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrejus_dataset.py
97 lines (70 loc) · 2.77 KB
/
frejus_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Project hiatus
# script to load the frejus dataset
# 12/10/2020
# Cedric BARON
# loading required packages
import torch
from numpy import load
import os
# importing used functions
import utils as fun
def get_datasets(years):
"""
Loads the data (rasters and year vector) as pytorch tensor from the frejus dataset
"""
# loading the files
list_files = fun.get_files("data/np_data")
# sorting the names to have similar order
list_files.sort(reverse=True)
# storing our rasters per year in a dictionary
data = {}
for year in years:
data[year] = []
# loading the list for the year vector
year_vect = []
# loading the rasters
for year in data:
for file in list_files:
if file[13:17] == year in file and "gt" not in file:
data[year].append(load(file))
# loading the dates vectors
for year in data:
for file in list_files:
if file[13:17] == year in file and "gt" in file:
year_vect += [load(file)]
# dict to store our GT rasters (change and classes)
gt_change ={}
for year in years:
gt_change[year] = []
# getting the list of files
list_files_gt = fun.get_files("data/GT_np")
list_files_gt.sort()
# loading the matrixes in the dict per year
for file in list_files_gt:
for year in gt_change:
if year in file and "class" in file:
gt_change[year].append(load(file))
"""
we now build our dataset as a list of tensors
"""
# stacking up the samples into a list
data_list = []
for year in data:
data_list += data[year]
# loading the torch data without batch
datasets = fun.train_val_dataset(data_list, year_vect)
# extracting evals, converting into pytorch tensors
datasets["val"] = [torch.from_numpy(obs) for obs in datasets["val"]]
datasets["gt_val"] = [torch.from_numpy(obs) for obs in datasets["gt_val"]]
# extracting only images for the training, converting into tensors
datasets["train"] = [torch.from_numpy(obs) for obs in datasets["train"]]
datasets["gt_train"] = [torch.from_numpy(obs) for obs in datasets["gt_train"]]
# merging val and train because we want more samples
datasets["train"] = datasets["train"] + datasets["val"]
datasets["gt_train"] = datasets["gt_train"] + datasets["gt_val"]
## we need to combine images and labels for the discriminator
train_data = []
# as we don't need evaluation data, we merge the two datasets
for i in range(len(datasets["train"])):
train_data.append([datasets["train"][i], datasets["gt_train"][i]])
return train_data, gt_change, data