From cfe164511d24c4b64dff12dc1fa086ac67f0ac71 Mon Sep 17 00:00:00 2001 From: lx Date: Tue, 3 Dec 2024 13:44:01 +0800 Subject: [PATCH] 'addBigST' --- baselines/BigST/PEMS04.py | 162 ------------- baselines/BigST/arch/__init__.py | 3 - baselines/BigST/arch/bigst_arch.py | 139 ----------- baselines/BigST/arch/linear_conv.py | 99 -------- baselines/BigST/arch/pipeline.py | 67 ------ baselines/BigST/arch/preprocess/metrics.py | 53 ----- baselines/BigST/arch/preprocess/model.py | 206 ---------------- baselines/BigST/arch/preprocess/pipeline.py | 38 --- baselines/BigST/arch/preprocess/preprocess.py | 127 ---------- baselines/BigST/arch/preprocess/util.py | 147 ------------ baselines/BigST/arch/random_map.py | 81 ------- baselines/BigST/loss/__init__.py | 1 - baselines/BigST/loss/loss.py | 35 --- baselines/BigSTPreprocess/PEMS08.py | 153 ------------ baselines/BigSTPreprocess/arch/__init__.py | 3 - .../arch/bigst_preprocess_arch.py | 220 ------------------ baselines/BigSTPreprocess/runner/__init__.py | 1 - .../runner/bigstpreprocess_runner.py | 49 ---- 18 files changed, 1584 deletions(-) delete mode 100644 baselines/BigST/PEMS04.py delete mode 100644 baselines/BigST/arch/__init__.py delete mode 100644 baselines/BigST/arch/bigst_arch.py delete mode 100644 baselines/BigST/arch/linear_conv.py delete mode 100644 baselines/BigST/arch/pipeline.py delete mode 100644 baselines/BigST/arch/preprocess/metrics.py delete mode 100644 baselines/BigST/arch/preprocess/model.py delete mode 100644 baselines/BigST/arch/preprocess/pipeline.py delete mode 100644 baselines/BigST/arch/preprocess/preprocess.py delete mode 100644 baselines/BigST/arch/preprocess/util.py delete mode 100644 baselines/BigST/arch/random_map.py delete mode 100644 baselines/BigST/loss/__init__.py delete mode 100644 baselines/BigST/loss/loss.py delete mode 100644 baselines/BigSTPreprocess/PEMS08.py delete mode 100644 baselines/BigSTPreprocess/arch/__init__.py delete mode 100644 baselines/BigSTPreprocess/arch/bigst_preprocess_arch.py delete mode 100644 baselines/BigSTPreprocess/runner/__init__.py delete mode 100644 baselines/BigSTPreprocess/runner/bigstpreprocess_runner.py diff --git a/baselines/BigST/PEMS04.py b/baselines/BigST/PEMS04.py deleted file mode 100644 index 6dcc698a..00000000 --- a/baselines/BigST/PEMS04.py +++ /dev/null @@ -1,162 +0,0 @@ -import os -import sys -import torch -from easydict import EasyDict -sys.path.append(os.path.abspath(__file__ + '/../../..')) - -from basicts.metrics import masked_mae, masked_mape, masked_rmse -from basicts.data import TimeSeriesForecastingDataset -from basicts.runners import SimpleTimeSeriesForecastingRunner -from basicts.scaler import ZScoreScaler -from basicts.utils import get_regular_settings, load_adj - -from .arch import BigST -from .loss import bigst_loss - -############################## Hot Parameters ############################## -# Dataset & Metrics configuration -DATA_NAME = 'PEMS04' # Dataset name -regular_settings = get_regular_settings(DATA_NAME) -INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence -OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence -TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios -NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data -RESCALE = regular_settings['RESCALE'] # Whether to rescale the data -NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data -# Model architecture and parameters -MODEL_ARCH = BigST -adj_mx, _ = load_adj("datasets/" + DATA_NAME + - "/adj_mx.pkl", "doubletransition") -MODEL_PARAM = { - "num_nodes": 307, - "seq_num": INPUT_LEN, - "in_dim": 3, - "out_dim": OUTPUT_LEN, - "hid_dim": 32, - "tau" : 0.25, - "random_feature_dim": 64, - "node_emb_dim": 32, - "time_emb_dim": 32, - "use_residual": True, - "use_bn": True, - "use_spatial": True, - "use_long": False, - "dropout": 0.3, - "supports": [torch.tensor(i) for i in adj_mx], - "time_of_day_size": 288, - "day_of_week_size": 7, -} - -NUM_EPOCHS = 100 - -############################## General Configuration ############################## -CFG = EasyDict() -# General settings -CFG.DESCRIPTION = 'An Example Config' -CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode) -# Runner -CFG.RUNNER = SimpleTimeSeriesForecastingRunner - -############################## Environment Configuration ############################## - -CFG.ENV = EasyDict() # Environment settings. Default: None -CFG.ENV.SEED = 0 # Random seed. Default: None - -############################## Dataset Configuration ############################## -CFG.DATASET = EasyDict() -# Dataset settings -CFG.DATASET.NAME = DATA_NAME -CFG.DATASET.TYPE = TimeSeriesForecastingDataset -CFG.DATASET.PARAM = EasyDict({ - 'dataset_name': DATA_NAME, - 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO, - 'input_len': INPUT_LEN, - 'output_len': OUTPUT_LEN, - # 'mode' is automatically set by the runner -}) - -############################## Scaler Configuration ############################## -CFG.SCALER = EasyDict() -# Scaler settings -CFG.SCALER.TYPE = ZScoreScaler # Scaler class -CFG.SCALER.PARAM = EasyDict({ - 'dataset_name': DATA_NAME, - 'train_ratio': TRAIN_VAL_TEST_RATIO[0], - 'norm_each_channel': NORM_EACH_CHANNEL, - 'rescale': RESCALE, -}) - -############################## Model Configuration ############################## -CFG.MODEL = EasyDict() -# Model settings -CFG.MODEL.NAME = MODEL_ARCH.__name__ -CFG.MODEL.ARCH = MODEL_ARCH -CFG.MODEL.PARAM = MODEL_PARAM -CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] -CFG.MODEL.TARGET_FEATURES = [0] - -############################## Metrics Configuration ############################## - -CFG.METRICS = EasyDict() -# Metrics settings -CFG.METRICS.FUNCS = EasyDict({ - 'MAE': masked_mae, - 'MAPE': masked_mape, - 'RMSE': masked_rmse, - }) -CFG.METRICS.TARGET = 'MAE' -CFG.METRICS.NULL_VAL = NULL_VAL - -############################## Training Configuration ############################## -CFG.TRAIN = EasyDict() -CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS -CFG.TRAIN.CKPT_SAVE_DIR = os.path.join( - 'checkpoints', - MODEL_ARCH.__name__, - '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)]) -) -CFG.TRAIN.LOSS = bigst_loss -# Optimizer settings -CFG.TRAIN.OPTIM = EasyDict() -CFG.TRAIN.OPTIM.TYPE = "AdamW" -CFG.TRAIN.OPTIM.PARAM = { - "lr": 0.002, - "weight_decay": 0.0001, -} -# Learning rate scheduler settings -CFG.TRAIN.LR_SCHEDULER = EasyDict() -CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR" -CFG.TRAIN.LR_SCHEDULER.PARAM = { - "milestones": [1, 50], - "gamma": 0.5 -} -# Train data loader settings -CFG.TRAIN.DATA = EasyDict() -CFG.TRAIN.DATA.BATCH_SIZE = 64 -CFG.TRAIN.DATA.SHUFFLE = True -# Gradient clipping settings -CFG.TRAIN.CLIP_GRAD_PARAM = { - "max_norm": 5.0 -} - -############################## Validation Configuration ############################## -CFG.VAL = EasyDict() -CFG.VAL.INTERVAL = 1 -CFG.VAL.DATA = EasyDict() -CFG.VAL.DATA.BATCH_SIZE = 64 - -############################## Test Configuration ############################## -CFG.TEST = EasyDict() -CFG.TEST.INTERVAL = 1 -CFG.TEST.DATA = EasyDict() -CFG.TEST.DATA.BATCH_SIZE = 64 - -############################## Evaluation Configuration ############################## - -CFG.EVAL = EasyDict() - -# Evaluation parameters -CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: [] -CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True - - diff --git a/baselines/BigST/arch/__init__.py b/baselines/BigST/arch/__init__.py deleted file mode 100644 index 7cb17069..00000000 --- a/baselines/BigST/arch/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .bigst_arch import BigST - -__all__ = ["BigST"] diff --git a/baselines/BigST/arch/bigst_arch.py b/baselines/BigST/arch/bigst_arch.py deleted file mode 100644 index 5e8c6034..00000000 --- a/baselines/BigST/arch/bigst_arch.py +++ /dev/null @@ -1,139 +0,0 @@ -import math -import torch -import torch.nn as nn -import torch.nn.functional as F -from .linear_conv import * -from torch.autograd import Variable -import pdb - -class BigST(nn.Module): - """ - Paper: BigST: Linear Complexity Spatio-Temporal Graph Neural Network for Traffic Forecasting on Large-Scale Road Networks - Link: https://dl.acm.org/doi/10.14778/3641204.3641217 - Official Code: https://github.com/usail-hkust/BigST?tab=readme-ov-file - Venue: VLDB 2024 - Task: Spatial-Temporal Forecasting - """ - def __init__(self, seq_num, in_dim, out_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ - use_residual, use_bn, use_spatial, use_long, dropout, time_of_day_size, day_of_week_size, supports=None, edge_indices=None): - super(BigST, self).__init__() - self.tau = tau - self.layer_num = 3 - self.in_dim = in_dim - self.random_feature_dim = random_feature_dim - - self.use_residual = use_residual - self.use_bn = use_bn - self.use_spatial = use_spatial - self.use_long = use_long - - self.dropout = dropout - self.activation = nn.ReLU() - self.supports = supports - - self.time_num = time_of_day_size - self.week_num = day_of_week_size - - # node embedding layer - self.node_emb_layer = nn.Parameter(torch.empty(num_nodes, node_emb_dim)) - nn.init.xavier_uniform_(self.node_emb_layer) - - # time embedding layer - self.time_emb_layer = nn.Parameter(torch.empty(self.time_num, time_emb_dim)) - nn.init.xavier_uniform_(self.time_emb_layer) - self.week_emb_layer = nn.Parameter(torch.empty(self.week_num, time_emb_dim)) - nn.init.xavier_uniform_(self.week_emb_layer) - - # embedding layer - self.input_emb_layer = nn.Conv2d(seq_num*in_dim, hid_dim, kernel_size=(1, 1), bias=True) - - self.W_1 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) - self.W_2 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) - - self.linear_conv = nn.ModuleList() - self.bn = nn.ModuleList() - - self.supports_len = 0 - if supports is not None: - self.supports_len += len(supports) - - for i in range(self.layer_num): - self.linear_conv.append(linearized_conv(hid_dim*4, hid_dim*4, self.dropout, self.tau, self.random_feature_dim)) - self.bn.append(nn.LayerNorm(hid_dim*4)) - - if self.use_long: - self.regression_layer = nn.Conv2d(hid_dim*4*2+hid_dim+seq_num, out_dim, kernel_size=(1, 1), bias=True) - else: - self.regression_layer = nn.Conv2d(hid_dim*4*2, out_dim, kernel_size=(1, 1), bias=True) - - # def forward(self, x, feat=None): - def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor: - x = history_data[:, :, :, range(self.in_dim)] # (batch_size, in_len, data_dim) - x = x.transpose(1,2) - # input: (B, N, T, D) - B, N, T, D = x.size() - - time_emb = self.time_emb_layer[(x[:, :, -1, 1]*self.time_num).type(torch.LongTensor)] - week_emb = self.week_emb_layer[(x[:, :, -1, 2]).type(torch.LongTensor)] - - # input embedding - x = x.contiguous().view(B, N, -1).transpose(1, 2).unsqueeze(-1) # (B, D*T, N, 1) - input_emb = self.input_emb_layer(x) - - # node embeddings - node_emb = self.node_emb_layer.unsqueeze(0).expand(B, -1, -1).transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) - - # time embeddings - time_emb = time_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) - week_emb = week_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) - - x_g = torch.cat([node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) - x = torch.cat([input_emb, node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) - - # linearized spatial convolution - x_pool = [x] # (B, dim*4, N, 1) - node_vec1 = self.W_1(x_g) # (B, dim, N, 1) - node_vec2 = self.W_2(x_g) # (B, dim, N, 1) - node_vec1 = node_vec1.permute(0, 2, 3, 1) # (B, N, 1, dim) - node_vec2 = node_vec2.permute(0, 2, 3, 1) # (B, N, 1, dim) - for i in range(self.layer_num): - if self.use_residual: - residual = x - x, node_vec1_prime, node_vec2_prime = self.linear_conv[i](x, node_vec1, node_vec2) - - if self.use_residual: - x = x+residual - - if self.use_bn: - x = x.permute(0, 2, 3, 1) # (B, N, 1, dim*4) - x = self.bn[i](x) - x = x.permute(0, 3, 1, 2) - - x_pool.append(x) - x = torch.cat(x_pool, dim=1) # (B, dim*4, N, 1) - - x = self.activation(x) # (B, dim*4, N, 1) - - if self.use_long: - feat = feat.permute(0, 2, 1).unsqueeze(-1) # (B, F, N, 1) - x = torch.cat([x, feat], dim=1) - x = self.regression_layer(x) # (B, N, T) - x = x.squeeze(-1).permute(0, 2, 1) - else: - x = self.regression_layer(x) # (B, N, T) - x = x.squeeze(-1).permute(0, 2, 1) - - # if self.use_spatial: - - # supports = [support.to(x.device) for support in self.supports] - # edge_indices = torch.nonzero(supports[0] > 0) - - # # s_loss = spatial_loss(node_vec1_prime, node_vec2_prime, supports, edge_indices) - # return x.transpose(1,2).unsqueeze(-1), s_loss - # else: - # return x.transpose(1,2).unsqueeze(-1), 0 - return {"prediction": x.transpose(1,2).unsqueeze(-1) - , "node_vec1": node_vec1_prime - , "node_vec2": node_vec2_prime - , "supports": self.supports - , 'use_spatial': self.use_spatial} \ No newline at end of file diff --git a/baselines/BigST/arch/linear_conv.py b/baselines/BigST/arch/linear_conv.py deleted file mode 100644 index 34d84eab..00000000 --- a/baselines/BigST/arch/linear_conv.py +++ /dev/null @@ -1,99 +0,0 @@ -import math -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable - -from .random_map import * - -def linear_kernel(x, node_vec1, node_vec2): - # x: [B, N, 1, nhid] node_vec1: [B, N, 1, r], node_vec2: [B, N, 1, r] - node_vec1 = node_vec1.permute(1, 0, 2, 3) # [N, B, 1, r] - node_vec2 = node_vec2.permute(1, 0, 2, 3) # [N, B, 1, r] - x = x.permute(1, 0, 2, 3) # [N, B, 1, nhid] - - v2x = torch.einsum("nbhm,nbhd->bhmd", node_vec2, x) - out1 = torch.einsum("nbhm,bhmd->nbhd", node_vec1, v2x) # [N, B, 1, nhid] - - one_matrix = torch.ones([node_vec2.shape[0]]).to(node_vec1.device) - node_vec2_sum = torch.einsum("nbhm,n->bhm", node_vec2, one_matrix) - out2 = torch.einsum("nbhm,bhm->nbh", node_vec1, node_vec2_sum) # [N, 1] - - out1 = out1.permute(1, 0, 2, 3) # [B, N, 1, nhid] - out2 = out2.permute(1, 0, 2) - out2 = torch.unsqueeze(out2, len(out2.shape)) - out = out1 / out2 # [B, N, 1, nhid] - - return out - -# def spatial_loss(node_vec1, node_vec2, supports, edge_indices): -# B = node_vec1.size(0) -# node_vec1 = node_vec1.permute(1, 0, 2, 3) # [N, B, 1, r] -# node_vec2 = node_vec2.permute(1, 0, 2, 3) # [N, B, 1, r] - -# node_vec1_end, node_vec2_start = node_vec1[edge_indices[:, 0]], node_vec2[edge_indices[:, 1]] # [E, B, 1, r] -# attn1 = torch.einsum("ebhm,ebhm->ebh", node_vec1_end, node_vec2_start) # [E, B, 1] -# attn1 = attn1.permute(1, 0, 2) # [B, E, 1] - -# one_matrix = torch.ones([node_vec2.shape[0]]).to(node_vec1.device) -# node_vec2_sum = torch.einsum("nbhm,n->bhm", node_vec2, one_matrix) -# attn_norm = torch.einsum("nbhm,bhm->nbh", node_vec1, node_vec2_sum) - -# attn2 = attn_norm[edge_indices[:, 0]] # [E, B, 1] -# attn2 = attn2.permute(1, 0, 2) # [B, E, 1] -# attn_score = attn1 / attn2 # [B, E, 1] - -# d_norm = supports[0][edge_indices[:, 0], edge_indices[:, 1]] -# d_norm = d_norm.reshape(1, -1, 1).repeat(B, 1, attn_score.shape[-1]) -# spatial_loss = torch.mean(attn_score.log() * d_norm) - -# return spatial_loss - -class conv_approximation(nn.Module): - def __init__(self, dropout, tau, random_feature_dim): - super(conv_approximation, self).__init__() - self.tau = tau - self.random_feature_dim = random_feature_dim - self.activation = nn.ReLU() - self.dropout = dropout - - def forward(self, x, node_vec1, node_vec2): - B = x.size(0) # (B, N, 1, nhid) - dim = node_vec1.shape[-1] # (N, 1, d) - - random_seed = torch.ceil(torch.abs(torch.sum(node_vec1) * 1e8)).to(torch.int32) - random_matrix = create_random_matrix(self.random_feature_dim, dim, seed=random_seed).to(node_vec1.device) # (d, r) - - node_vec1 = node_vec1 / math.sqrt(self.tau) - node_vec2 = node_vec2 / math.sqrt(self.tau) - node_vec1_prime = random_feature_map(node_vec1, True, random_matrix) # [B, N, 1, r] - node_vec2_prime = random_feature_map(node_vec2, False, random_matrix) # [B, N, 1, r] - - x = linear_kernel(x, node_vec1_prime, node_vec2_prime) - - return x, node_vec1_prime, node_vec2_prime - -class linearized_conv(nn.Module): - def __init__(self, in_dim, hid_dim, dropout, tau=1.0, random_feature_dim=64): - super(linearized_conv, self).__init__() - - self.dropout = dropout - self.tau = tau - self.random_feature_dim = random_feature_dim - - self.input_fc = nn.Conv2d(in_channels=in_dim, out_channels=hid_dim, kernel_size=(1, 1), bias=True) - self.activation = nn.ReLU() - self.dropout_layer = nn.Dropout(p=dropout) - - self.conv_app_layer = conv_approximation(self.dropout, self.tau, self.random_feature_dim) - - def forward(self, input_data, node_vec1, node_vec2): - x = self.input_fc(input_data) - x = self.activation(x) - x = self.dropout_layer(x) - - x = x.permute(0, 2, 3, 1) # (B, N, 1, dim*4) - x, node_vec1_prime, node_vec2_prime = self.conv_app_layer(x, node_vec1, node_vec2) - x = x.permute(0, 3, 1, 2) # (B, dim*4, N, 1) - - return x, node_vec1_prime, node_vec2_prime diff --git a/baselines/BigST/arch/pipeline.py b/baselines/BigST/arch/pipeline.py deleted file mode 100644 index fd4122cb..00000000 --- a/baselines/BigST/arch/pipeline.py +++ /dev/null @@ -1,67 +0,0 @@ -import torch -import torch.nn as nn -import torch.optim as optim -import torch.nn.functional as F -from torch.autograd import Variable - -import metrics -from bigst import bigst - -class train_pipeline(): - def __init__(self, scaler, seq_num, in_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ - use_residual, use_bn, use_spatial, use_long, dropout, lrate, wdecay, device, supports, edge_indices): - self.model = bigst(device, seq_num, in_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ - use_residual, use_bn, use_spatial, use_long, dropout, supports=supports, edge_indices=edge_indices) - self.model.to(device) - self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay) - self.loss = metrics.masked_mae - self.scaler = scaler - self.use_spatial = use_spatial - self.clip = 5 - - def train(self, input, real_val, feat=None): - self.model.train() - self.optimizer.zero_grad() - - if self.use_spatial: - output, spatial_loss = self.model(input, feat) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - loss = self.loss(predict, real, 0.0)-0.3*spatial_loss - else: - output, _ = self.model(input, feat) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - loss = self.loss(predict, real, 0.0) - - loss.backward() - if self.clip is not None: - torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) - self.optimizer.step() - mape = metrics.masked_mape(predict,real,0.0).item() - rmse = metrics.masked_rmse(predict,real,0.0).item() - return loss.item(), mape, rmse - - def eval(self, input, real_val, feat=None, flag='overall'): - if flag=='overall': - self.model.eval() - output, _ = self.model(input, feat) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - loss = self.loss(predict, real, 0.0) - mape = metrics.masked_mape(predict,real,0.0).item() - rmse = metrics.masked_rmse(predict,real,0.0).item() - return loss.item(), mape, rmse - elif flag=='horizon': - self.model.eval() - output, _ = self.model(input, feat) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - loss = [] - mape = [] - rmse = [] - for i in range(12): - loss.append(self.loss(predict[..., i], real[..., i], 0.0).item()) - mape.append(metrics.masked_mape(predict[..., i], real[..., i], 0.0).item()) - rmse.append(metrics.masked_rmse(predict[..., i], real[..., i], 0.0).item()) - return loss, mape, rmse diff --git a/baselines/BigST/arch/preprocess/metrics.py b/baselines/BigST/arch/preprocess/metrics.py deleted file mode 100644 index aac0af60..00000000 --- a/baselines/BigST/arch/preprocess/metrics.py +++ /dev/null @@ -1,53 +0,0 @@ -import torch -import numpy as np - -def masked_mse(preds, labels, null_val=np.nan): - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = (preds-labels)**2 - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) - -def masked_rmse(preds, labels, null_val=np.nan): - return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val)) - -def masked_mae(preds, labels, null_val=np.nan): - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = torch.abs(preds-labels) - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) - - -def masked_mape(preds, labels, null_val=np.nan): - labels = torch.where(labels<0.01, torch.zeros_like(labels), labels) - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = torch.abs(preds-labels)/labels - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) - - -def metric(pred, real): - mae = masked_mae(pred,real,0.0).item() - mape = masked_mape(pred,real,0.0).item() - rmse = masked_rmse(pred,real,0.0).item() - return mae,mape,rmse \ No newline at end of file diff --git a/baselines/BigST/arch/preprocess/model.py b/baselines/BigST/arch/preprocess/model.py deleted file mode 100644 index 44bd07c1..00000000 --- a/baselines/BigST/arch/preprocess/model.py +++ /dev/null @@ -1,206 +0,0 @@ -import math -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -import sys - -def create_projection_matrix(m, d, seed=0, scaling=0, struct_mode=False): - nb_full_blocks = int(m/d) - block_list = [] - current_seed = seed - for _ in range(nb_full_blocks): - torch.manual_seed(current_seed) - if struct_mode: - q = create_products_of_givens_rotations(d, current_seed) - else: - unstructured_block = torch.randn((d, d)) - q, _ = torch.qr(unstructured_block) - q = torch.t(q) - block_list.append(q) - current_seed += 1 - remaining_rows = m - nb_full_blocks * d - if remaining_rows > 0: - torch.manual_seed(current_seed) - if struct_mode: - q = create_products_of_givens_rotations(d, current_seed) - else: - unstructured_block = torch.randn((d, d)) - q, _ = torch.qr(unstructured_block) - q = torch.t(q) - block_list.append(q[0:remaining_rows]) - final_matrix = torch.vstack(block_list) - - current_seed += 1 - torch.manual_seed(current_seed) - if scaling == 0: - multiplier = torch.norm(torch.randn((m, d)), dim=1) - elif scaling == 1: - multiplier = torch.sqrt(torch.tensor(float(d))) * torch.ones(m) - else: - raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling) - - return torch.matmul(torch.diag(multiplier), final_matrix) - -def create_products_of_givens_rotations(dim, seed): - nb_givens_rotations = dim * int(math.ceil(math.log(float(dim)))) - q = np.eye(dim, dim) - np.random.seed(seed) - for _ in range(nb_givens_rotations): - random_angle = math.pi * np.random.uniform() - random_indices = np.random.choice(dim, 2) - index_i = min(random_indices[0], random_indices[1]) - index_j = max(random_indices[0], random_indices[1]) - slice_i = q[index_i] - slice_j = q[index_j] - new_slice_i = math.cos(random_angle) * slice_i + math.cos(random_angle) * slice_j - new_slice_j = -math.sin(random_angle) * slice_i + math.cos(random_angle) * slice_j - q[index_i] = new_slice_i - q[index_j] = new_slice_j - return torch.tensor(q, dtype=torch.float32) - -def softmax_kernel_transformation(data, is_query, projection_matrix=None, numerical_stabilizer=0.000001): - data_normalizer = 1.0 / torch.sqrt(torch.sqrt(torch.tensor(data.shape[-1], dtype=torch.float32))) - data = data_normalizer * data - ratio = 1.0 / torch.sqrt(torch.tensor(projection_matrix.shape[0], dtype=torch.float32)) - data_dash = torch.einsum("bnhd,md->bnhm", data, projection_matrix) - diag_data = torch.square(data) - diag_data = torch.sum(diag_data, dim=len(data.shape)-1) - diag_data = diag_data / 2.0 - diag_data = torch.unsqueeze(diag_data, dim=len(data.shape)-1) - last_dims_t = len(data_dash.shape) - 1 - attention_dims_t = len(data_dash.shape) - 3 - if is_query: - data_dash = ratio * ( - torch.exp(data_dash - diag_data - torch.max(data_dash, dim=last_dims_t, keepdim=True)[0]) + numerical_stabilizer - ) - else: - data_dash = ratio * ( - torch.exp(data_dash - diag_data - torch.max(torch.max(data_dash, dim=last_dims_t, keepdim=True)[0], - dim=attention_dims_t, keepdim=True)[0]) + numerical_stabilizer - ) - return data_dash - -def numerator(qs, ks, vs): - kvs = torch.einsum("nbhm,nbhd->bhmd", ks, vs) # kvs refers to U_k in the paper - return torch.einsum("nbhm,bhmd->nbhd", qs, kvs) - -def denominator(qs, ks): - all_ones = torch.ones([ks.shape[0]]).to(qs.device) - ks_sum = torch.einsum("nbhm,n->bhm", ks, all_ones) # ks_sum refers to O_k in the paper - return torch.einsum("nbhm,bhm->nbh", qs, ks_sum) - -def linearized_softmax(x, query, key): - # x: [B, N, H, D] query: [B, N, H, m], key: [B, N, H, m] - query = query.permute(1, 0, 2, 3) # [N, B, H, m] - key = key.permute(1, 0, 2, 3) # [N, B, H, m] - x = x.permute(1, 0, 2, 3) # [N, B, H, D] - - z_num = numerator(query, key, x) # [N, B, H, D] - z_den = denominator(query, key) # [N, H] - - z_num = z_num.permute(1, 0, 2, 3) # [B, N, H, D] - z_den = z_den.permute(1, 0, 2) - z_den = torch.unsqueeze(z_den, len(z_den.shape)) - z_output = z_num / z_den # # [B, N, H, D] - - return z_output - -class linearized_attention(nn.Module): - def __init__(self, c_in, c_out, dropout, random_feature_dim=30, tau=1.0, num_heads=4): - super(linearized_attention, self).__init__() - self.Wk = nn.Linear(c_in, c_out * num_heads) - self.Wq = nn.Linear(c_in, c_out * num_heads) - self.Wv = nn.Linear(c_in, c_out * num_heads) - self.Wo = nn.Linear(c_out * num_heads, c_out) - self.c_in = c_in - self.c_out = c_out - self.num_heads = num_heads - self.tau = tau - self.random_feature_dim = random_feature_dim - self.activation = nn.ReLU - self.dropout = dropout - - def reset_parameters(self): - self.Wk.reset_parameters() - self.Wq.reset_parameters() - self.Wv.reset_parameters() - self.Wo.reset_parameters() - - def forward(self, x): - B, T = x.size(0), x.size(1) # (B, T, D) - query = self.Wq(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) - key = self.Wk(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) - x = self.Wv(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) - - dim = query.shape[-1] # (B, T, H, D) - seed = torch.ceil(torch.abs(torch.sum(query) * 1e8)).to(torch.int32) - projection_matrix = create_projection_matrix(self.random_feature_dim, dim, seed=seed).to(query.device) # (d, m) - query = query / math.sqrt(self.tau) - key = key / math.sqrt(self.tau) - query = softmax_kernel_transformation(query, True, projection_matrix) # [B, T, H, m] - key = softmax_kernel_transformation(key, False, projection_matrix) # [B, T, H, m] - - x = linearized_softmax(x, query, key) - - x = self.Wo(x.flatten(-2, -1)) # (B, T, D) - - return x - -class linear_transformer(nn.Module): - def __init__(self, input_length, output_length, in_dim, num_nodes, nhid, dropout=0.3): - super(linear_transformer, self).__init__() - self.tau = 1.0 - self.layer_num = 3 - self.random_feature_dim = nhid*2 - - self.use_residual = True - self.use_bn = False - self.use_act = True - - self.dropout = dropout - self.activation = nn.ReLU() - - self.fc_convs = nn.ModuleList() - self.transformer_layer = nn.ModuleList() - self.bn = nn.ModuleList() - self.context_conv = nn.Conv2d(in_channels=in_dim, out_channels=nhid, kernel_size=(12, 1), stride=(12, 1)) - - self.temporal_embedding = nn.Parameter(torch.empty(int(input_length/12), nhid), requires_grad=True) # (C, nhid) - nn.init.xavier_uniform_(self.temporal_embedding) - - for i in range(self.layer_num): - self.transformer_layer.append(linearized_attention(nhid, nhid, self.dropout, self.random_feature_dim, self.tau)) - self.bn.append(nn.LayerNorm(nhid)) - - self.regression_layer = nn.Linear(nhid, output_length) - - def forward(self, x): - # input: (1, 9638, 2016, 3) (B, N, T, D) - B, N, T, D = x.size() - pe = self.temporal_embedding.unsqueeze(0).expand(B*N, -1, -1) # (B*N, T/12, nhid) - - x = x.reshape(B*N, T, D) - x = x.permute(0, 2, 1).unsqueeze(-1) # (B*N, T, D) -> (B*N, D, T, 1) - - # convolution layer - x = self.context_conv(x) # (B*N, D, T, 1) -> (B*N, nhid, T/12, 1) - x = x.squeeze(-1) # (B*N, nhid, T/12) - - # temporal embedding layer - x = x.permute(0, 2, 1) # (B*N, T/12, nhid) - x = x+pe # (B*N, T/12, nhid) - - # linearized attention - for num in range(self.layer_num): - residual = x # (B*N, T/12, nhid) - x = self.transformer_layer[num](x) # (B*N, T/12, nhid) - x = self.bn[num](x) - x = x+residual # (B*N, T/12, nhid) - - x = self.activation(x) # (B*N, T/12, nhid) - x = x[:, -1, :] - # x = torch.sum(x, dim=1) # (B*N, nhid) - feat = x.view(B, N, -1) # (B, N, nhid) - x = self.regression_layer(feat) # (B, N, output_length) - return x, feat diff --git a/baselines/BigST/arch/preprocess/pipeline.py b/baselines/BigST/arch/preprocess/pipeline.py deleted file mode 100644 index 46499b73..00000000 --- a/baselines/BigST/arch/preprocess/pipeline.py +++ /dev/null @@ -1,38 +0,0 @@ -import torch.optim as optim -from model import * -import metrics - -class train_pipeline(): - def __init__(self, scaler, input_length, output_length, in_dim, num_nodes, nhid, dropout, lrate, wdecay, device): - self.model = linear_transformer(input_length, output_length, in_dim, num_nodes, nhid, dropout) - self.model.to(device) - self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay) - self.loss = metrics.masked_mae - self.scaler = scaler - self.clip = 5 - - def train(self, input, real_val): - self.model.train() - self.optimizer.zero_grad() - output, _ = self.model(input) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - - loss = self.loss(predict, real, 0.0) - loss.backward() - if self.clip is not None: - torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) - self.optimizer.step() - mape = metrics.masked_mape(predict,real,0.0).item() - rmse = metrics.masked_rmse(predict,real,0.0).item() - return loss.item(), mape, rmse - - def eval(self, input, real_val): - self.model.eval() - output, _ = self.model(input) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - loss = self.loss(predict, real, 0.0) - mape = metrics.masked_mape(predict,real,0.0).item() - rmse = metrics.masked_rmse(predict,real,0.0).item() - return loss.item(), mape, rmse diff --git a/baselines/BigST/arch/preprocess/preprocess.py b/baselines/BigST/arch/preprocess/preprocess.py deleted file mode 100644 index feb795e3..00000000 --- a/baselines/BigST/arch/preprocess/preprocess.py +++ /dev/null @@ -1,127 +0,0 @@ -import torch -import numpy as np -import argparse -import time -import util -from pipeline import train_pipeline - -parser = argparse.ArgumentParser() -parser.add_argument('--device',type=str,default='cuda:0',help='') -parser.add_argument('--data',type=str,default='/data/pems_data/pems_vldb/long_term',help='data path') -parser.add_argument('--input_length',type=int,default=2016,help='') -parser.add_argument('--output_length',type=int,default=12,help='') -parser.add_argument('--nhid',type=int,default=32,help='') -parser.add_argument('--in_dim',type=int,default=3,help='inputs dimension') -parser.add_argument('--num_nodes',type=int,default=9638,help='number of nodes') -parser.add_argument('--batch_size',type=int,default=1,help='batch size') -parser.add_argument('--tiny_batch_size',type=int,default=256,help='tiny batch size') -parser.add_argument('--learning_rate',type=float,default=0.001,help='learning rate') -parser.add_argument('--dropout',type=float,default=0.3,help='dropout rate') -parser.add_argument('--weight_decay',type=float,default=0.0001,help='weight decay rate') -parser.add_argument('--epochs',type=int,default=100,help='') -parser.add_argument('--print_every',type=int,default=1,help='') -#parser.add_argument('--seed',type=int,default=99,help='random seed') -parser.add_argument('--save',type=str,default='checkpoint/',help='save path') -parser.add_argument('--expid',type=int,default=1,help='experiment id') - -args = parser.parse_args() - -def main(): - # set seed - # torch.manual_seed(args.seed) - # np.random.seed(args.seed) - # load data - device = torch.device(args.device) - dataloader = util.load_dataset(args.data, args.batch_size, args.batch_size, args.batch_size, - args.input_length, args.output_length) - scaler = dataloader['scaler'] - tiny_batch_size = args.tiny_batch_size - - print(args) - - trainer = train_pipeline(scaler, args.input_length, args.output_length, args.in_dim, args.num_nodes, - args.nhid, args.dropout, args.learning_rate, args.weight_decay, device) - - print("start training...",flush=True) - his_loss =[] - train_time = [] - val_time = [] - - for i in range(1, args.epochs+1): - # train - train_loss = [] - train_mape = [] - train_rmse = [] - t1 = time.time() - dataloader['train_loader'].shuffle() - for iter, (x, y) in enumerate(dataloader['train_loader'].get_iterator()): - B, T, N, F = x.shape - batch_num = int(B * N / tiny_batch_size) - idx_perm = np.random.permutation([i for i in range(B*N)]) - for j in range(batch_num): - if j==batch_num-1: - x_ = x[:, :, idx_perm[(j+1)*tiny_batch_size:], :] - y_ = y[:, :, idx_perm[(j+1)*tiny_batch_size:], :] - else: - x_ = x[:, :, idx_perm[j*tiny_batch_size:(j+1)*tiny_batch_size], :] - y_ = y[:, :, idx_perm[j*tiny_batch_size:(j+1)*tiny_batch_size], :] - - trainx = torch.Tensor(x_).to(device) # (B, T, N, F) - trainx = trainx.transpose(1, 2) # (B, N, T, F) - trainy = torch.Tensor(y_).to(device) # (B, T, N, F) - trainy = trainy.transpose(1, 2) # (B, N, T, F) - metrics = trainer.train(trainx, trainy[:,:,:,0]) - train_loss.append(metrics[0]) - train_mape.append(metrics[1]) - train_rmse.append(metrics[2]) - t2 = time.time() - train_time.append(t2-t1) - - if iter % args.print_every == 0: - log = 'Iter: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}' - print(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]),flush=True) - # Save the model parameters for subsequent preprocessing - torch.save(trainer.model.state_dict(), args.save+"linear_transformer.pth") - - # validation - valid_loss = [] - valid_mape = [] - valid_rmse = [] - - s1 = time.time() - for iter, (x, y) in enumerate(dataloader['val_loader'].get_iterator()): - B, T, N, F = x.shape - batch_num = int(B*N/tiny_batch_size) - for k in range(batch_num): - if k==batch_num-1: - x_ = x[:, :, (k+1)*tiny_batch_size:, :] - y_ = y[:, :, (k+1)*tiny_batch_size:, :] - else: - x_ = x[:, :, k*tiny_batch_size:(k+1)*tiny_batch_size, :] - y_ = y[:, :, k*tiny_batch_size:(k+1)*tiny_batch_size, :] - testx = torch.Tensor(x).to(device) - testx = testx.transpose(1, 2) - testy = torch.Tensor(y).to(device) - testy = testy.transpose(1, 2) - metrics = trainer.eval(testx, testy[:,:,:,0]) - valid_loss.append(metrics[0]) - valid_mape.append(metrics[1]) - valid_rmse.append(metrics[2]) - s2 = time.time() - mvalid_loss = np.mean(valid_loss) - mvalid_mape = np.mean(valid_mape) - mvalid_rmse = np.mean(valid_rmse) - log = 'Epoch: {:03d}, Validation Inference Time: {:.4f} secs' - print(log.format(i,(s2-s1))) - log = 'Valid MAE: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}' - print(log.format(mvalid_loss, mvalid_mape, mvalid_rmse), flush=True) - val_time.append(s2-s1) - - print("Average Training Time: {:.4f} secs/epoch".format(np.mean(train_time))) - print("Average Inference Time: {:.4f} secs".format(np.mean(val_time))) - -if __name__ == "__main__": - t1 = time.time() - main() - t2 = time.time() - print("Total time spent: {:.4f}".format(t2-t1)) diff --git a/baselines/BigST/arch/preprocess/util.py b/baselines/BigST/arch/preprocess/util.py deleted file mode 100644 index 81bf2cd7..00000000 --- a/baselines/BigST/arch/preprocess/util.py +++ /dev/null @@ -1,147 +0,0 @@ -import pickle -import numpy as np -import os -import scipy.sparse as sp -import torch -from scipy.sparse import linalg - -class DataLoader(object): - def __init__(self, data, batch_size, input_length, output_length): - self.seq_length_x = input_length - self.seq_length_y = output_length - self.y_start = 1 - self.batch_size = batch_size - self.current_ind = 0 - self.x_offsets = np.sort(np.concatenate((np.arange(-(self.seq_length_x - 1), 1, 1),))) - self.y_offsets = np.sort(np.arange(self.y_start, (self.seq_length_y + 1), 1)) - self.min_t = abs(min(self.x_offsets)) - self.max_t = abs(data.shape[0] - abs(max(self.y_offsets))) - mod = (self.max_t-self.min_t) % batch_size - if mod != 0: - self.data = data[:-mod] - else: - self.data = data - self.max_t = abs(self.data.shape[0] - abs(max(self.y_offsets))) - self.permutation = [i for i in range(self.min_t, self.max_t)] - - def shuffle(self): - self.permutation = np.random.permutation([i for i in range(self.min_t, self.max_t)]) - - def get_iterator(self): - self.current_ind = 0 - - def _wrapper(): - while self.current_ind < len(self.permutation): - if self.batch_size > 1: - x_batch = [] - y_batch = [] - for i in range(self.batch_size): - x_i = self.data[self.permutation[self.current_ind+i] + self.x_offsets, ...] - y_i = self.data[self.permutation[self.current_ind+i] + self.y_offsets, ...] - x_batch.append(x_i) - y_batch.append(y_i) - - x_batch = np.stack(x_batch, axis=0) - y_batch = np.stack(y_batch, axis=0) - else: - x_batch = self.data[self.permutation[self.current_ind] + self.x_offsets, ...] - y_batch = self.data[self.permutation[self.current_ind] + self.y_offsets, ...] - x_batch = np.expand_dims(x_batch, axis=0) - y_batch = np.expand_dims(y_batch, axis=0) - yield (x_batch, y_batch) - self.current_ind += self.batch_size - - return _wrapper() - -class StandardScaler(): - """ - Standard the input - """ - - def __init__(self, mean, std): - self.mean = mean - self.std = std - - def transform(self, data): - return (data - self.mean) / self.std - - def inverse_transform(self, data): - return (data * self.std) + self.mean - -def sym_adj(adj): - """Symmetrically normalize adjacency matrix.""" - adj = sp.coo_matrix(adj) - rowsum = np.array(adj.sum(1)) - d_inv_sqrt = np.power(rowsum, -0.5).flatten() - d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. - d_mat_inv_sqrt = sp.diags(d_inv_sqrt) - return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).astype(np.float32).todense() - -def asym_adj(adj): - adj = sp.coo_matrix(adj) - rowsum = np.array(adj.sum(1)).flatten() - d_inv = np.power(rowsum, -1).flatten() - d_inv[np.isinf(d_inv)] = 0. - d_mat= sp.diags(d_inv) - return d_mat.dot(adj).astype(np.float32).todense() - -def calculate_normalized_laplacian(adj): - """ - # L = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2 - # D = diag(A 1) - :param adj: - :return: - """ - adj = sp.coo_matrix(adj) - d = np.array(adj.sum(1)) - d_inv_sqrt = np.power(d, -0.5).flatten() - d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. - d_mat_inv_sqrt = sp.diags(d_inv_sqrt) - normalized_laplacian = sp.eye(adj.shape[0]) - adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() - return normalized_laplacian - -def calculate_scaled_laplacian(adj_mx, lambda_max=2, undirected=True): - if undirected: - adj_mx = np.maximum.reduce([adj_mx, adj_mx.T]) - L = calculate_normalized_laplacian(adj_mx) - if lambda_max is None: - lambda_max, _ = linalg.eigsh(L, 1, which='LM') - lambda_max = lambda_max[0] - L = sp.csr_matrix(L) - M, _ = L.shape - I = sp.identity(M, format='csr', dtype=L.dtype) - L = (2 / lambda_max * L) - I - return L.astype(np.float32).todense() - -def load_pickle(pickle_file): - try: - with open(pickle_file, 'rb') as f: - pickle_data = pickle.load(f) - except UnicodeDecodeError as e: - with open(pickle_file, 'rb') as f: - pickle_data = pickle.load(f, encoding='latin1') - except Exception as e: - print('Unable to load data ', pickle_file, ':', e) - raise - return pickle_data - -def load_adj(adj_filename, adjtype): - adj_mx = np.load(adj_filename) - print('adj_mx: ', adj_mx.shape) - adj = [asym_adj(adj_mx)] - return adj - -def load_dataset(dataset_dir, batch_size, valid_batch_size, test_batch_size, input_length, output_length): - data = {} - for category in ['train', 'val', 'test']: - data[category] = np.load(os.path.join(dataset_dir, category + '.npy')) - print('*'*10, category, data[category].shape, '*'*10) - scaler = StandardScaler(mean=data['train'][..., 0].mean(), std=data['train'][..., 0].std()) - # Data format - for category in ['train', 'val', 'test']: - data[category][..., 0] = scaler.transform(data[category][..., 0]) - data['train_loader'] = DataLoader(data['train'], batch_size, input_length, output_length) - data['val_loader'] = DataLoader(data['val'], valid_batch_size, input_length, output_length) - data['test_loader'] = DataLoader(data['test'], test_batch_size, input_length, output_length) - data['scaler'] = scaler - return data diff --git a/baselines/BigST/arch/random_map.py b/baselines/BigST/arch/random_map.py deleted file mode 100644 index ea7e49d4..00000000 --- a/baselines/BigST/arch/random_map.py +++ /dev/null @@ -1,81 +0,0 @@ -import math -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable - -def create_products_of_givens_rotations(dim, seed): - nb_givens_rotations = dim * int(math.ceil(math.log(float(dim)))) - q = np.eye(dim, dim) - np.random.seed(seed) - for _ in range(nb_givens_rotations): - random_angle = math.pi * np.random.uniform() - random_indices = np.random.choice(dim, 2) - index_i = min(random_indices[0], random_indices[1]) - index_j = max(random_indices[0], random_indices[1]) - slice_i = q[index_i] - slice_j = q[index_j] - new_slice_i = math.cos(random_angle) * slice_i + math.cos(random_angle) * slice_j - new_slice_j = -math.sin(random_angle) * slice_i + math.cos(random_angle) * slice_j - q[index_i] = new_slice_i - q[index_j] = new_slice_j - return torch.tensor(q, dtype=torch.float32) - -def create_random_matrix(m, d, seed=0, scaling=0, struct_mode=False): - nb_full_blocks = int(m/d) - block_list = [] - current_seed = seed - for _ in range(nb_full_blocks): - torch.manual_seed(current_seed) - if struct_mode: - q = create_products_of_givens_rotations(d, current_seed) - else: - unstructured_block = torch.randn((d, d)) - q, _ = torch.qr(unstructured_block) - q = torch.t(q) - block_list.append(q) - current_seed += 1 - remaining_rows = m - nb_full_blocks * d - if remaining_rows > 0: - torch.manual_seed(current_seed) - if struct_mode: - q = create_products_of_givens_rotations(d, current_seed) - else: - unstructured_block = torch.randn((d, d)) - q, _ = torch.qr(unstructured_block) - q = torch.t(q) - block_list.append(q[0:remaining_rows]) - final_matrix = torch.vstack(block_list) - - current_seed += 1 - torch.manual_seed(current_seed) - if scaling == 0: - multiplier = torch.norm(torch.randn((m, d)), dim=1) - elif scaling == 1: - multiplier = torch.sqrt(torch.tensor(float(d))) * torch.ones(m) - else: - raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling) - - return torch.matmul(torch.diag(multiplier), final_matrix) - -def random_feature_map(data, is_query, projection_matrix=None, numerical_stabilizer=0.000001): - data_normalizer = 1.0 / torch.sqrt(torch.sqrt(torch.tensor(data.shape[-1], dtype=torch.float32))) - data = data_normalizer * data - ratio = 1.0 / torch.sqrt(torch.tensor(projection_matrix.shape[0], dtype=torch.float32)) - data_dash = torch.einsum("bnhd,md->bnhm", data, projection_matrix) - diag_data = torch.square(data) - diag_data = torch.sum(diag_data, dim=len(data.shape)-1) - diag_data = diag_data / 2.0 - diag_data = torch.unsqueeze(diag_data, dim=len(data.shape)-1) - last_dims_t = len(data_dash.shape) - 1 - attention_dims_t = len(data_dash.shape) - 3 - if is_query: - data_dash = ratio * ( - torch.exp(data_dash - diag_data - torch.max(data_dash, dim=last_dims_t, keepdim=True)[0]) + numerical_stabilizer - ) - else: - data_dash = ratio * ( - torch.exp(data_dash - diag_data - torch.max(torch.max(data_dash, dim=last_dims_t, keepdim=True)[0], - dim=attention_dims_t, keepdim=True)[0]) + numerical_stabilizer - ) - return data_dash diff --git a/baselines/BigST/loss/__init__.py b/baselines/BigST/loss/__init__.py deleted file mode 100644 index c22530d7..00000000 --- a/baselines/BigST/loss/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .loss import bigst_loss \ No newline at end of file diff --git a/baselines/BigST/loss/loss.py b/baselines/BigST/loss/loss.py deleted file mode 100644 index 831f541e..00000000 --- a/baselines/BigST/loss/loss.py +++ /dev/null @@ -1,35 +0,0 @@ -import torch -import numpy as np -from basicts.metrics import masked_mae - -def spatial_loss(node_vec1, node_vec2, supports, edge_indices): - B = node_vec1.size(0) - node_vec1 = node_vec1.permute(1, 0, 2, 3) # [N, B, 1, r] - node_vec2 = node_vec2.permute(1, 0, 2, 3) # [N, B, 1, r] - - node_vec1_end, node_vec2_start = node_vec1[edge_indices[:, 0]], node_vec2[edge_indices[:, 1]] # [E, B, 1, r] - attn1 = torch.einsum("ebhm,ebhm->ebh", node_vec1_end, node_vec2_start) # [E, B, 1] - attn1 = attn1.permute(1, 0, 2) # [B, E, 1] - - one_matrix = torch.ones([node_vec2.shape[0]]).to(node_vec1.device) - node_vec2_sum = torch.einsum("nbhm,n->bhm", node_vec2, one_matrix) - attn_norm = torch.einsum("nbhm,bhm->nbh", node_vec1, node_vec2_sum) - - attn2 = attn_norm[edge_indices[:, 0]] # [E, B, 1] - attn2 = attn2.permute(1, 0, 2) # [B, E, 1] - attn_score = attn1 / attn2 # [B, E, 1] - - d_norm = supports[0][edge_indices[:, 0], edge_indices[:, 1]] - d_norm = d_norm.reshape(1, -1, 1).repeat(B, 1, attn_score.shape[-1]) - spatial_loss = torch.mean(attn_score.log() * d_norm) - - return spatial_loss - -def bigst_loss(prediction, target, node_vec1, node_vec2, supports, use_spatial): - if use_spatial: - supports = [support.to(prediction.device) for support in supports] - edge_indices = torch.nonzero(supports[0] > 0) - s_loss = spatial_loss(node_vec1, node_vec2, supports, edge_indices) - return masked_mae(prediction, target, 0.0) - 0.3 * s_loss # 源代码:pipline.py line30 - else: - return masked_mae(prediction, target, 0.0) \ No newline at end of file diff --git a/baselines/BigSTPreprocess/PEMS08.py b/baselines/BigSTPreprocess/PEMS08.py deleted file mode 100644 index 39d7f4b9..00000000 --- a/baselines/BigSTPreprocess/PEMS08.py +++ /dev/null @@ -1,153 +0,0 @@ -import os -import sys -import torch -from easydict import EasyDict -sys.path.append(os.path.abspath(__file__ + '/../../..')) - -from basicts.metrics import masked_mae, masked_mape, masked_rmse -from basicts.data import TimeSeriesForecastingDataset -from basicts.runners import SimpleTimeSeriesForecastingRunner -from basicts.scaler import ZScoreScaler -from basicts.utils import get_regular_settings, load_adj - -from .arch import BigSTPreprocess -from .runner import BigSTPreprocessRunner - -############################## Hot Parameters ############################## -# Dataset & Metrics configuration -DATA_NAME = 'PEMS08' # Dataset name -regular_settings = get_regular_settings(DATA_NAME) -INPUT_LEN = 2016 -OUTPUT_LEN = 12 -TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios -NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data -RESCALE = regular_settings['RESCALE'] # Whether to rescale the data -NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data -# Model architecture and parameters -MODEL_ARCH = BigSTPreprocess -adj_mx, _ = load_adj("datasets/" + DATA_NAME + - "/adj_mx.pkl", "doubletransition") -MODEL_PARAM = { - "num_nodes": 170, - "in_dim": 3, - "dropout": 0.3, - "input_length": INPUT_LEN, - "output_length": OUTPUT_LEN, - "nhid": 32, - "tiny_batch_size": 64, - -} - -NUM_EPOCHS = 100 - -############################## General Configuration ############################## -CFG = EasyDict() -# General settings -CFG.DESCRIPTION = 'An Example Config' -CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode) -# Runner -CFG.RUNNER = BigSTPreprocessRunner - -############################## Environment Configuration ############################## - -CFG.ENV = EasyDict() # Environment settings. Default: None -CFG.ENV.SEED = 0 # Random seed. Default: None - -############################## Dataset Configuration ############################## -CFG.DATASET = EasyDict() -# Dataset settings -CFG.DATASET.NAME = DATA_NAME -CFG.DATASET.TYPE = TimeSeriesForecastingDataset -CFG.DATASET.PARAM = EasyDict({ - 'dataset_name': DATA_NAME, - 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO, - 'input_len': INPUT_LEN, - 'output_len': OUTPUT_LEN, - # 'mode' is automatically set by the runner -}) - -############################## Scaler Configuration ############################## -CFG.SCALER = EasyDict() -# Scaler settings -CFG.SCALER.TYPE = ZScoreScaler # Scaler class -CFG.SCALER.PARAM = EasyDict({ - 'dataset_name': DATA_NAME, - 'train_ratio': TRAIN_VAL_TEST_RATIO[0], - 'norm_each_channel': NORM_EACH_CHANNEL, - 'rescale': RESCALE, -}) - -############################## Model Configuration ############################## -CFG.MODEL = EasyDict() -# Model settings -CFG.MODEL.NAME = MODEL_ARCH.__name__ -CFG.MODEL.ARCH = MODEL_ARCH -CFG.MODEL.PARAM = MODEL_PARAM -CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] -CFG.MODEL.TARGET_FEATURES = [0] - -############################## Metrics Configuration ############################## - -CFG.METRICS = EasyDict() -# Metrics settings -CFG.METRICS.FUNCS = EasyDict({ - 'MAE': masked_mae, - 'MAPE': masked_mape, - 'RMSE': masked_rmse, - }) -CFG.METRICS.TARGET = 'MAE' -CFG.METRICS.NULL_VAL = NULL_VAL - -############################## Training Configuration ############################## -CFG.TRAIN = EasyDict() -CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS -CFG.TRAIN.CKPT_SAVE_DIR = os.path.join( - 'checkpoints', - MODEL_ARCH.__name__, - '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)]) -) -CFG.TRAIN.LOSS = masked_mae -# Optimizer settings -CFG.TRAIN.OPTIM = EasyDict() -CFG.TRAIN.OPTIM.TYPE = "AdamW" -CFG.TRAIN.OPTIM.PARAM = { - "lr": 0.002, - "weight_decay": 0.0001, -} -# Learning rate scheduler settings -CFG.TRAIN.LR_SCHEDULER = EasyDict() -CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR" -CFG.TRAIN.LR_SCHEDULER.PARAM = { - "milestones": [1, 50], - "gamma": 0.5 -} -# Train data loader settings -CFG.TRAIN.DATA = EasyDict() -CFG.TRAIN.DATA.BATCH_SIZE = 1 -CFG.TRAIN.DATA.SHUFFLE = True -# Gradient clipping settings -CFG.TRAIN.CLIP_GRAD_PARAM = { - "max_norm": 5.0 -} - -############################## Validation Configuration ############################## -CFG.VAL = EasyDict() -CFG.VAL.INTERVAL = 1 -CFG.VAL.DATA = EasyDict() -CFG.VAL.DATA.BATCH_SIZE = 1 - -############################## Test Configuration ############################## -CFG.TEST = EasyDict() -CFG.TEST.INTERVAL = 1 -CFG.TEST.DATA = EasyDict() -CFG.TEST.DATA.BATCH_SIZE = 1 - -############################## Evaluation Configuration ############################## - -CFG.EVAL = EasyDict() - -# Evaluation parameters -CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: [] -CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True - - diff --git a/baselines/BigSTPreprocess/arch/__init__.py b/baselines/BigSTPreprocess/arch/__init__.py deleted file mode 100644 index b56180dd..00000000 --- a/baselines/BigSTPreprocess/arch/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .bigst_preprocess_arch import BigSTPreprocess - -__all__ = ["BigSTPreprocess"] diff --git a/baselines/BigSTPreprocess/arch/bigst_preprocess_arch.py b/baselines/BigSTPreprocess/arch/bigst_preprocess_arch.py deleted file mode 100644 index 4a38990a..00000000 --- a/baselines/BigSTPreprocess/arch/bigst_preprocess_arch.py +++ /dev/null @@ -1,220 +0,0 @@ -import math -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -import sys -import numpy as np -import pdb - -def create_projection_matrix(m, d, seed=0, scaling=0, struct_mode=False): - nb_full_blocks = int(m/d) - block_list = [] - current_seed = seed - for _ in range(nb_full_blocks): - torch.manual_seed(current_seed) - if struct_mode: - q = create_products_of_givens_rotations(d, current_seed) - else: - unstructured_block = torch.randn((d, d)) - q, _ = torch.qr(unstructured_block) - q = torch.t(q) - block_list.append(q) - current_seed += 1 - remaining_rows = m - nb_full_blocks * d - if remaining_rows > 0: - torch.manual_seed(current_seed) - if struct_mode: - q = create_products_of_givens_rotations(d, current_seed) - else: - unstructured_block = torch.randn((d, d)) - q, _ = torch.qr(unstructured_block) - q = torch.t(q) - block_list.append(q[0:remaining_rows]) - final_matrix = torch.vstack(block_list) - - current_seed += 1 - torch.manual_seed(current_seed) - if scaling == 0: - multiplier = torch.norm(torch.randn((m, d)), dim=1) - elif scaling == 1: - multiplier = torch.sqrt(torch.tensor(float(d))) * torch.ones(m) - else: - raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling) - - return torch.matmul(torch.diag(multiplier), final_matrix) - -def create_products_of_givens_rotations(dim, seed): - nb_givens_rotations = dim * int(math.ceil(math.log(float(dim)))) - q = np.eye(dim, dim) - np.random.seed(seed) - for _ in range(nb_givens_rotations): - random_angle = math.pi * np.random.uniform() - random_indices = np.random.choice(dim, 2) - index_i = min(random_indices[0], random_indices[1]) - index_j = max(random_indices[0], random_indices[1]) - slice_i = q[index_i] - slice_j = q[index_j] - new_slice_i = math.cos(random_angle) * slice_i + math.cos(random_angle) * slice_j - new_slice_j = -math.sin(random_angle) * slice_i + math.cos(random_angle) * slice_j - q[index_i] = new_slice_i - q[index_j] = new_slice_j - return torch.tensor(q, dtype=torch.float32) - -def softmax_kernel_transformation(data, is_query, projection_matrix=None, numerical_stabilizer=0.000001): - data_normalizer = 1.0 / torch.sqrt(torch.sqrt(torch.tensor(data.shape[-1], dtype=torch.float32))) - data = data_normalizer * data - ratio = 1.0 / torch.sqrt(torch.tensor(projection_matrix.shape[0], dtype=torch.float32)) - data_dash = torch.einsum("bnhd,md->bnhm", data, projection_matrix) - diag_data = torch.square(data) - diag_data = torch.sum(diag_data, dim=len(data.shape)-1) - diag_data = diag_data / 2.0 - diag_data = torch.unsqueeze(diag_data, dim=len(data.shape)-1) - last_dims_t = len(data_dash.shape) - 1 - attention_dims_t = len(data_dash.shape) - 3 - if is_query: - data_dash = ratio * ( - torch.exp(data_dash - diag_data - torch.max(data_dash, dim=last_dims_t, keepdim=True)[0]) + numerical_stabilizer - ) - else: - data_dash = ratio * ( - torch.exp(data_dash - diag_data - torch.max(torch.max(data_dash, dim=last_dims_t, keepdim=True)[0], - dim=attention_dims_t, keepdim=True)[0]) + numerical_stabilizer - ) - return data_dash - -def numerator(qs, ks, vs): - kvs = torch.einsum("nbhm,nbhd->bhmd", ks, vs) # kvs refers to U_k in the paper - return torch.einsum("nbhm,bhmd->nbhd", qs, kvs) - -def denominator(qs, ks): - all_ones = torch.ones([ks.shape[0]]).to(qs.device) - ks_sum = torch.einsum("nbhm,n->bhm", ks, all_ones) # ks_sum refers to O_k in the paper - return torch.einsum("nbhm,bhm->nbh", qs, ks_sum) - -def linearized_softmax(x, query, key): - # x: [B, N, H, D] query: [B, N, H, m], key: [B, N, H, m] - query = query.permute(1, 0, 2, 3) # [N, B, H, m] - key = key.permute(1, 0, 2, 3) # [N, B, H, m] - x = x.permute(1, 0, 2, 3) # [N, B, H, D] - - z_num = numerator(query, key, x) # [N, B, H, D] - z_den = denominator(query, key) # [N, H] - - z_num = z_num.permute(1, 0, 2, 3) # [B, N, H, D] - z_den = z_den.permute(1, 0, 2) - z_den = torch.unsqueeze(z_den, len(z_den.shape)) - z_output = z_num / z_den # # [B, N, H, D] - - return z_output - -class linearized_attention(nn.Module): - def __init__(self, c_in, c_out, dropout, random_feature_dim=30, tau=1.0, num_heads=4): - super(linearized_attention, self).__init__() - self.Wk = nn.Linear(c_in, c_out * num_heads) - self.Wq = nn.Linear(c_in, c_out * num_heads) - self.Wv = nn.Linear(c_in, c_out * num_heads) - self.Wo = nn.Linear(c_out * num_heads, c_out) - self.c_in = c_in - self.c_out = c_out - self.num_heads = num_heads - self.tau = tau - self.random_feature_dim = random_feature_dim - self.activation = nn.ReLU - self.dropout = dropout - - def reset_parameters(self): - self.Wk.reset_parameters() - self.Wq.reset_parameters() - self.Wv.reset_parameters() - self.Wo.reset_parameters() - - def forward(self, x): - B, T = x.size(0), x.size(1) # (B, T, D) - query = self.Wq(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) - key = self.Wk(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) - x = self.Wv(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) - - dim = query.shape[-1] # (B, T, H, D) - seed = torch.ceil(torch.abs(torch.sum(query) * 1e8)).to(torch.int32) - projection_matrix = create_projection_matrix(self.random_feature_dim, dim, seed=seed).to(query.device) # (d, m) - query = query / math.sqrt(self.tau) - key = key / math.sqrt(self.tau) - query = softmax_kernel_transformation(query, True, projection_matrix) # [B, T, H, m] - key = softmax_kernel_transformation(key, False, projection_matrix) # [B, T, H, m] - - x = linearized_softmax(x, query, key) - - x = self.Wo(x.flatten(-2, -1)) # (B, T, D) - - return x - - -class BigSTPreprocess(nn.Module): - """ - Paper: BigST: Linear Complexity Spatio-Temporal Graph Neural Network for Traffic Forecasting on Large-Scale Road Networks - Link: https://dl.acm.org/doi/10.14778/3641204.3641217 - Official Code: https://github.com/usail-hkust/BigST?tab=readme-ov-file - Venue: VLDB 2024 - Task: Spatial-Temporal Forecasting - """ - def __init__(self, input_length, output_length, in_dim, num_nodes, nhid, tiny_batch_size, dropout=0.3): - # def __init__(self, **model_kwargs): - super(BigSTPreprocess, self).__init__() - self.tau = 1.0 - self.layer_num = 3 - self.random_feature_dim = nhid*2 - - self.use_residual = True - self.use_bn = False - self.use_act = True - - self.dropout = dropout - self.activation = nn.ReLU() - - self.fc_convs = nn.ModuleList() - self.transformer_layer = nn.ModuleList() - self.bn = nn.ModuleList() - self.context_conv = nn.Conv2d(in_channels=in_dim, out_channels=nhid, kernel_size=(12, 1), stride=(12, 1)) - - self.temporal_embedding = nn.Parameter(torch.empty(int(input_length/12), nhid), requires_grad=True) # (C, nhid) - nn.init.xavier_uniform_(self.temporal_embedding) - - for i in range(self.layer_num): - self.transformer_layer.append(linearized_attention(nhid, nhid, self.dropout, self.random_feature_dim, self.tau)) - self.bn.append(nn.LayerNorm(nhid)) - - self.regression_layer = nn.Linear(nhid, output_length) - - self.tiny_batch_size = tiny_batch_size - - def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor: - x = history_data.transpose(1,2) - # input: (1, 9638, 2016, 3) (B, N, T, D) - B, N, T, D = x.size() - pe = self.temporal_embedding.unsqueeze(0).expand(B*N, -1, -1) # (B*N, T/12, nhid) - - x = x.reshape(B*N, T, D) - x = x.permute(0, 2, 1).unsqueeze(-1) # (B*N, T, D) -> (B*N, D, T, 1) - - # convolution layer - x = self.context_conv(x) # (B*N, D, T, 1) -> (B*N, nhid, T/12, 1) - x = x.squeeze(-1) # (B*N, nhid, T/12) - - # temporal embedding layer - x = x.permute(0, 2, 1) # (B*N, T/12, nhid) - x = x+pe # (B*N, T/12, nhid) - - # linearized attention - for num in range(self.layer_num): - residual = x # (B*N, T/12, nhid) - x = self.transformer_layer[num](x) # (B*N, T/12, nhid) - x = self.bn[num](x) - x = x+residual # (B*N, T/12, nhid) - - x = self.activation(x) # (B*N, T/12, nhid) - x = x[:, -1, :] - # x = torch.sum(x, dim=1) # (B*N, nhid) - feat = x.view(B, N, -1) # (B, N, nhid) - x = self.regression_layer(feat) # (B, N, output_length) - return {'prediction': x.transpose(1,2).unsqueeze(-1), 'feat':feat} \ No newline at end of file diff --git a/baselines/BigSTPreprocess/runner/__init__.py b/baselines/BigSTPreprocess/runner/__init__.py deleted file mode 100644 index 2a0ecce8..00000000 --- a/baselines/BigSTPreprocess/runner/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .bigstpreprocess_runner import BigSTPreprocessRunner \ No newline at end of file diff --git a/baselines/BigSTPreprocess/runner/bigstpreprocess_runner.py b/baselines/BigSTPreprocess/runner/bigstpreprocess_runner.py deleted file mode 100644 index 74b9a607..00000000 --- a/baselines/BigSTPreprocess/runner/bigstpreprocess_runner.py +++ /dev/null @@ -1,49 +0,0 @@ -from typing import Tuple, Union, Dict -import torch -import numpy as np -import wandb -import pdb -import os - -from basicts.runners import SimpleTimeSeriesForecastingRunner - - -class BigSTPreprocessRunner(SimpleTimeSeriesForecastingRunner): - def __init__(self, cfg: dict): - super().__init__(cfg) - - self.tiny_batch_size = cfg.MODEL.PARAM.tiny_batch_size - - def preprocessing(self, input_data: Dict) -> Dict: - """Preprocess data. - - Args: - input_data (Dict): Dictionary containing data to be processed. - - Returns: - Dict: Processed data. - """ - - input_data = super().preprocessing(input_data) - - x = input_data['inputs'] - y = input_data['target'] - - - B, T, N, F = x.shape - batch_num = int(B * N / self.tiny_batch_size) # 似乎要确保不能等于0 - idx_perm = np.random.permutation([i for i in range(B*N)]) - - for j in range(batch_num): - if j==batch_num-1: - x_ = x[:, :, idx_perm[(j+1)*self.tiny_batch_size:], :] - y_ = y[:, :, idx_perm[(j+1)*self.tiny_batch_size:], :] - else: - x_ = x[:, :, idx_perm[j*self.tiny_batch_size:(j+1)*self.tiny_batch_size], :] - y_ = y[:, :, idx_perm[j*self.tiny_batch_size:(j+1)*self.tiny_batch_size], :] - - input_data['inputs'] = x_ - input_data['target'] = y_ - return input_data - - \ No newline at end of file