diff --git a/baselines/BigST/PEMS08.py b/baselines/BigST/PEMS08.py new file mode 100644 index 00000000..9c9a4b2e --- /dev/null +++ b/baselines/BigST/PEMS08.py @@ -0,0 +1,182 @@ +import os +import sys +import torch +from easydict import EasyDict +sys.path.append(os.path.abspath(__file__ + '/../../..')) + +from basicts.metrics import masked_mae, masked_mape, masked_rmse +from basicts.data import TimeSeriesForecastingDataset +from basicts.runners import SimpleTimeSeriesForecastingRunner +from basicts.scaler import ZScoreScaler +from basicts.utils import get_regular_settings, load_adj + +from .arch import BigST +# from .runner import BigSTPreprocessRunner +from .loss import bigst_loss + +import pdb + +############################## Hot Parameters ############################## +# Dataset & Metrics configuration +DATA_NAME = 'PEMS08' # Dataset name +regular_settings = get_regular_settings(DATA_NAME) +INPUT_LEN = 2016 # regular_settings['INPUT_LEN'] # Length of input sequence +OUTPUT_LEN = 12 # regular_settings['OUTPUT_LEN'] # Length of output sequence +TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios +NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data +RESCALE = regular_settings['RESCALE'] # Whether to rescale the data +NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data +# Model architecture and parameters +PREPROCESSED_FILE = "checkpoints\\BigSTPreprocess\\PEMS08_100_2016_12\\db8308a2c87de35e5f3db6177c5714ff\\BigSTPreprocess_best_val_MAE.pt" +MODEL_ARCH = BigST + +adj_mx, _ = load_adj("datasets/" + DATA_NAME + + "/adj_mx.pkl", "doubletransition") +MODEL_PARAM = { + "bigst_args":{ + "num_nodes": 170, + "seq_num": 12, + "in_dim": 3, + "out_dim": OUTPUT_LEN, # 源代码固定成12了 + "hid_dim": 32, + "tau" : 0.25, + "random_feature_dim": 64, + "node_emb_dim": 32, + "time_emb_dim": 32, + "use_residual": True, + "use_bn": True, + "use_long": True, + "use_spatial": True, + "dropout": 0.3, + "supports": [torch.tensor(i) for i in adj_mx], + "time_of_day_size": 288, + "day_of_week_size": 7 + }, + "preprocess_path": PREPROCESSED_FILE, + "preprocess_args":{ + "num_nodes": 170, + "in_dim": 3, + "dropout": 0.3, + "input_length": 2016, + "output_length": 12, + "nhid": 32, + "tiny_batch_size": 64, + } + + +} + +NUM_EPOCHS = 100 + +############################## General Configuration ############################## +CFG = EasyDict() +# General settings +CFG.DESCRIPTION = 'An Example Config' +CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode) +# Runner +CFG.RUNNER = SimpleTimeSeriesForecastingRunner + +############################## Environment Configuration ############################## + +CFG.ENV = EasyDict() # Environment settings. Default: None +CFG.ENV.SEED = 0 # Random seed. Default: None + +############################## Dataset Configuration ############################## +CFG.DATASET = EasyDict() +# Dataset settings +CFG.DATASET.NAME = DATA_NAME +CFG.DATASET.TYPE = TimeSeriesForecastingDataset +CFG.DATASET.PARAM = EasyDict({ + 'dataset_name': DATA_NAME, + 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO, + 'input_len': INPUT_LEN, + 'output_len': OUTPUT_LEN, + # 'mode' is automatically set by the runner +}) + +############################## Scaler Configuration ############################## +CFG.SCALER = EasyDict() +# Scaler settings +CFG.SCALER.TYPE = ZScoreScaler # Scaler class +CFG.SCALER.PARAM = EasyDict({ + 'dataset_name': DATA_NAME, + 'train_ratio': TRAIN_VAL_TEST_RATIO[0], + 'norm_each_channel': NORM_EACH_CHANNEL, + 'rescale': RESCALE, +}) + +############################## Model Configuration ############################## +CFG.MODEL = EasyDict() +# Model settings +CFG.MODEL.NAME = MODEL_ARCH.__name__ +CFG.MODEL.ARCH = MODEL_ARCH +CFG.MODEL.PARAM = MODEL_PARAM +CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] +CFG.MODEL.TARGET_FEATURES = [0] + +############################## Metrics Configuration ############################## + +CFG.METRICS = EasyDict() +# Metrics settings +CFG.METRICS.FUNCS = EasyDict({ + 'MAE': masked_mae, + 'MAPE': masked_mape, + 'RMSE': masked_rmse, + }) +CFG.METRICS.TARGET = 'MAE' +CFG.METRICS.NULL_VAL = NULL_VAL + +############################## Training Configuration ############################## +CFG.TRAIN = EasyDict() +CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS +CFG.TRAIN.CKPT_SAVE_DIR = os.path.join( + 'checkpoints', + MODEL_ARCH.__name__, + '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)]) +) + + +CFG.TRAIN.LOSS = bigst_loss if MODEL_PARAM['bigst_args']['use_spatial'] else masked_mae +# Optimizer settings +CFG.TRAIN.OPTIM = EasyDict() +CFG.TRAIN.OPTIM.TYPE = "AdamW" +CFG.TRAIN.OPTIM.PARAM = { + "lr": 0.002, + "weight_decay": 0.0001, +} +# Learning rate scheduler settings +CFG.TRAIN.LR_SCHEDULER = EasyDict() +CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR" +CFG.TRAIN.LR_SCHEDULER.PARAM = { + "milestones": [1, 50], + "gamma": 0.5 +} +# Train data loader settings +CFG.TRAIN.DATA = EasyDict() +CFG.TRAIN.DATA.BATCH_SIZE = 64 +CFG.TRAIN.DATA.SHUFFLE = True +# Gradient clipping settings +CFG.TRAIN.CLIP_GRAD_PARAM = { + "max_norm": 5.0 +} + +############################## Validation Configuration ############################## +CFG.VAL = EasyDict() +CFG.VAL.INTERVAL = 1 +CFG.VAL.DATA = EasyDict() +CFG.VAL.DATA.BATCH_SIZE = 64 + +############################## Test Configuration ############################## +CFG.TEST = EasyDict() +CFG.TEST.INTERVAL = 1 +CFG.TEST.DATA = EasyDict() +CFG.TEST.DATA.BATCH_SIZE = 64 + +############################## Evaluation Configuration ############################## +CFG.EVAL = EasyDict() + +# Evaluation parameters +CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: [] +CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True + + diff --git a/baselines/BigST/PreprocessPEMS08.py b/baselines/BigST/PreprocessPEMS08.py new file mode 100644 index 00000000..39d7f4b9 --- /dev/null +++ b/baselines/BigST/PreprocessPEMS08.py @@ -0,0 +1,153 @@ +import os +import sys +import torch +from easydict import EasyDict +sys.path.append(os.path.abspath(__file__ + '/../../..')) + +from basicts.metrics import masked_mae, masked_mape, masked_rmse +from basicts.data import TimeSeriesForecastingDataset +from basicts.runners import SimpleTimeSeriesForecastingRunner +from basicts.scaler import ZScoreScaler +from basicts.utils import get_regular_settings, load_adj + +from .arch import BigSTPreprocess +from .runner import BigSTPreprocessRunner + +############################## Hot Parameters ############################## +# Dataset & Metrics configuration +DATA_NAME = 'PEMS08' # Dataset name +regular_settings = get_regular_settings(DATA_NAME) +INPUT_LEN = 2016 +OUTPUT_LEN = 12 +TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios +NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data +RESCALE = regular_settings['RESCALE'] # Whether to rescale the data +NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data +# Model architecture and parameters +MODEL_ARCH = BigSTPreprocess +adj_mx, _ = load_adj("datasets/" + DATA_NAME + + "/adj_mx.pkl", "doubletransition") +MODEL_PARAM = { + "num_nodes": 170, + "in_dim": 3, + "dropout": 0.3, + "input_length": INPUT_LEN, + "output_length": OUTPUT_LEN, + "nhid": 32, + "tiny_batch_size": 64, + +} + +NUM_EPOCHS = 100 + +############################## General Configuration ############################## +CFG = EasyDict() +# General settings +CFG.DESCRIPTION = 'An Example Config' +CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode) +# Runner +CFG.RUNNER = BigSTPreprocessRunner + +############################## Environment Configuration ############################## + +CFG.ENV = EasyDict() # Environment settings. Default: None +CFG.ENV.SEED = 0 # Random seed. Default: None + +############################## Dataset Configuration ############################## +CFG.DATASET = EasyDict() +# Dataset settings +CFG.DATASET.NAME = DATA_NAME +CFG.DATASET.TYPE = TimeSeriesForecastingDataset +CFG.DATASET.PARAM = EasyDict({ + 'dataset_name': DATA_NAME, + 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO, + 'input_len': INPUT_LEN, + 'output_len': OUTPUT_LEN, + # 'mode' is automatically set by the runner +}) + +############################## Scaler Configuration ############################## +CFG.SCALER = EasyDict() +# Scaler settings +CFG.SCALER.TYPE = ZScoreScaler # Scaler class +CFG.SCALER.PARAM = EasyDict({ + 'dataset_name': DATA_NAME, + 'train_ratio': TRAIN_VAL_TEST_RATIO[0], + 'norm_each_channel': NORM_EACH_CHANNEL, + 'rescale': RESCALE, +}) + +############################## Model Configuration ############################## +CFG.MODEL = EasyDict() +# Model settings +CFG.MODEL.NAME = MODEL_ARCH.__name__ +CFG.MODEL.ARCH = MODEL_ARCH +CFG.MODEL.PARAM = MODEL_PARAM +CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] +CFG.MODEL.TARGET_FEATURES = [0] + +############################## Metrics Configuration ############################## + +CFG.METRICS = EasyDict() +# Metrics settings +CFG.METRICS.FUNCS = EasyDict({ + 'MAE': masked_mae, + 'MAPE': masked_mape, + 'RMSE': masked_rmse, + }) +CFG.METRICS.TARGET = 'MAE' +CFG.METRICS.NULL_VAL = NULL_VAL + +############################## Training Configuration ############################## +CFG.TRAIN = EasyDict() +CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS +CFG.TRAIN.CKPT_SAVE_DIR = os.path.join( + 'checkpoints', + MODEL_ARCH.__name__, + '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)]) +) +CFG.TRAIN.LOSS = masked_mae +# Optimizer settings +CFG.TRAIN.OPTIM = EasyDict() +CFG.TRAIN.OPTIM.TYPE = "AdamW" +CFG.TRAIN.OPTIM.PARAM = { + "lr": 0.002, + "weight_decay": 0.0001, +} +# Learning rate scheduler settings +CFG.TRAIN.LR_SCHEDULER = EasyDict() +CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR" +CFG.TRAIN.LR_SCHEDULER.PARAM = { + "milestones": [1, 50], + "gamma": 0.5 +} +# Train data loader settings +CFG.TRAIN.DATA = EasyDict() +CFG.TRAIN.DATA.BATCH_SIZE = 1 +CFG.TRAIN.DATA.SHUFFLE = True +# Gradient clipping settings +CFG.TRAIN.CLIP_GRAD_PARAM = { + "max_norm": 5.0 +} + +############################## Validation Configuration ############################## +CFG.VAL = EasyDict() +CFG.VAL.INTERVAL = 1 +CFG.VAL.DATA = EasyDict() +CFG.VAL.DATA.BATCH_SIZE = 1 + +############################## Test Configuration ############################## +CFG.TEST = EasyDict() +CFG.TEST.INTERVAL = 1 +CFG.TEST.DATA = EasyDict() +CFG.TEST.DATA.BATCH_SIZE = 1 + +############################## Evaluation Configuration ############################## + +CFG.EVAL = EasyDict() + +# Evaluation parameters +CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: [] +CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True + + diff --git a/baselines/BigST/arch/__init__.py b/baselines/BigST/arch/__init__.py new file mode 100644 index 00000000..e2d419fd --- /dev/null +++ b/baselines/BigST/arch/__init__.py @@ -0,0 +1,5 @@ +from .bigst_arch import BigST +from .preprocess import BigSTPreprocess + + +__all__ = ["BigST", "BigSTPreprocess"] diff --git a/baselines/BigST/arch/bigst_arch.py b/baselines/BigST/arch/bigst_arch.py new file mode 100644 index 00000000..dd3d0342 --- /dev/null +++ b/baselines/BigST/arch/bigst_arch.py @@ -0,0 +1,78 @@ +import os +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from .linear_conv import * +from torch.autograd import Variable +import pdb +from .preprocess import BigSTPreprocess +from .model import Model + +def sample_period(x, time_num): + # trainx (B, N, T, F) + history_length = x.shape[-2] + idx_list = [i for i in range(history_length)] + period_list = [idx_list[i:i+12] for i in range(0, history_length, time_num)] + period_feat = [x[:,:,sublist,0] for sublist in period_list] + period_feat = torch.stack(period_feat) + period_feat = torch.mean(period_feat, dim=0) + + return period_feat + +class BigST(nn.Module): + """ + Paper: BigST: Linear Complexity Spatio-Temporal Graph Neural Network for Traffic Forecasting on Large-Scale Road Networks + Link: https://dl.acm.org/doi/10.14778/3641204.3641217 + Official Code: https://github.com/usail-hkust/BigST?tab=readme-ov-file + Venue: VLDB 2024 + Task: Spatial-Temporal Forecasting + """ + + def __init__(self, bigst_args, preprocess_path, preprocess_args): + super(BigST, self).__init__() + + self.use_long = bigst_args['use_long'] + self.in_dim = bigst_args['in_dim'] + self.out_dim = bigst_args['out_dim'] + self.time_num = bigst_args['time_of_day_size'] + self.bigst = Model(**bigst_args) + + if self.use_long: + self.feat_extractor = BigSTPreprocess(**preprocess_args) + self.load_pre_trained_model(preprocess_path) + + def load_pre_trained_model(self, preprocess_path): + """Load pre-trained model""" + + # load parameters + checkpoint_dict = torch.load(preprocess_path) + self.feat_extractor.load_state_dict(checkpoint_dict["model_state_dict"]) + # freeze parameters + for param in self.feat_extractor.parameters(): + param.requires_grad = False + + self.feat_extractor.eval() + + + def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor: + history_data = history_data.transpose(1,2) # (B, N, T, D) + x = history_data[:, :, -self.out_dim:] # (batch_size, in_len, data_dim) + + if self.use_long: + feat = [] + for i in range(history_data.shape[0]): + with torch.no_grad(): + feat_sample = self.feat_extractor(history_data[[i],:,:,:], future_data, batch_seen, epoch, train) + feat.append(feat_sample['feat']) + + feat = torch.cat(feat, dim=0) + feat_period = sample_period(history_data, self.time_num) + feat = torch.cat([feat, feat_period], dim=2) + + return self.bigst(x, feat) + + else: + return self.bigst(x) + + \ No newline at end of file diff --git a/baselines/BigST/arch/linear_conv.py b/baselines/BigST/arch/linear_conv.py new file mode 100644 index 00000000..34d84eab --- /dev/null +++ b/baselines/BigST/arch/linear_conv.py @@ -0,0 +1,99 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable + +from .random_map import * + +def linear_kernel(x, node_vec1, node_vec2): + # x: [B, N, 1, nhid] node_vec1: [B, N, 1, r], node_vec2: [B, N, 1, r] + node_vec1 = node_vec1.permute(1, 0, 2, 3) # [N, B, 1, r] + node_vec2 = node_vec2.permute(1, 0, 2, 3) # [N, B, 1, r] + x = x.permute(1, 0, 2, 3) # [N, B, 1, nhid] + + v2x = torch.einsum("nbhm,nbhd->bhmd", node_vec2, x) + out1 = torch.einsum("nbhm,bhmd->nbhd", node_vec1, v2x) # [N, B, 1, nhid] + + one_matrix = torch.ones([node_vec2.shape[0]]).to(node_vec1.device) + node_vec2_sum = torch.einsum("nbhm,n->bhm", node_vec2, one_matrix) + out2 = torch.einsum("nbhm,bhm->nbh", node_vec1, node_vec2_sum) # [N, 1] + + out1 = out1.permute(1, 0, 2, 3) # [B, N, 1, nhid] + out2 = out2.permute(1, 0, 2) + out2 = torch.unsqueeze(out2, len(out2.shape)) + out = out1 / out2 # [B, N, 1, nhid] + + return out + +# def spatial_loss(node_vec1, node_vec2, supports, edge_indices): +# B = node_vec1.size(0) +# node_vec1 = node_vec1.permute(1, 0, 2, 3) # [N, B, 1, r] +# node_vec2 = node_vec2.permute(1, 0, 2, 3) # [N, B, 1, r] + +# node_vec1_end, node_vec2_start = node_vec1[edge_indices[:, 0]], node_vec2[edge_indices[:, 1]] # [E, B, 1, r] +# attn1 = torch.einsum("ebhm,ebhm->ebh", node_vec1_end, node_vec2_start) # [E, B, 1] +# attn1 = attn1.permute(1, 0, 2) # [B, E, 1] + +# one_matrix = torch.ones([node_vec2.shape[0]]).to(node_vec1.device) +# node_vec2_sum = torch.einsum("nbhm,n->bhm", node_vec2, one_matrix) +# attn_norm = torch.einsum("nbhm,bhm->nbh", node_vec1, node_vec2_sum) + +# attn2 = attn_norm[edge_indices[:, 0]] # [E, B, 1] +# attn2 = attn2.permute(1, 0, 2) # [B, E, 1] +# attn_score = attn1 / attn2 # [B, E, 1] + +# d_norm = supports[0][edge_indices[:, 0], edge_indices[:, 1]] +# d_norm = d_norm.reshape(1, -1, 1).repeat(B, 1, attn_score.shape[-1]) +# spatial_loss = torch.mean(attn_score.log() * d_norm) + +# return spatial_loss + +class conv_approximation(nn.Module): + def __init__(self, dropout, tau, random_feature_dim): + super(conv_approximation, self).__init__() + self.tau = tau + self.random_feature_dim = random_feature_dim + self.activation = nn.ReLU() + self.dropout = dropout + + def forward(self, x, node_vec1, node_vec2): + B = x.size(0) # (B, N, 1, nhid) + dim = node_vec1.shape[-1] # (N, 1, d) + + random_seed = torch.ceil(torch.abs(torch.sum(node_vec1) * 1e8)).to(torch.int32) + random_matrix = create_random_matrix(self.random_feature_dim, dim, seed=random_seed).to(node_vec1.device) # (d, r) + + node_vec1 = node_vec1 / math.sqrt(self.tau) + node_vec2 = node_vec2 / math.sqrt(self.tau) + node_vec1_prime = random_feature_map(node_vec1, True, random_matrix) # [B, N, 1, r] + node_vec2_prime = random_feature_map(node_vec2, False, random_matrix) # [B, N, 1, r] + + x = linear_kernel(x, node_vec1_prime, node_vec2_prime) + + return x, node_vec1_prime, node_vec2_prime + +class linearized_conv(nn.Module): + def __init__(self, in_dim, hid_dim, dropout, tau=1.0, random_feature_dim=64): + super(linearized_conv, self).__init__() + + self.dropout = dropout + self.tau = tau + self.random_feature_dim = random_feature_dim + + self.input_fc = nn.Conv2d(in_channels=in_dim, out_channels=hid_dim, kernel_size=(1, 1), bias=True) + self.activation = nn.ReLU() + self.dropout_layer = nn.Dropout(p=dropout) + + self.conv_app_layer = conv_approximation(self.dropout, self.tau, self.random_feature_dim) + + def forward(self, input_data, node_vec1, node_vec2): + x = self.input_fc(input_data) + x = self.activation(x) + x = self.dropout_layer(x) + + x = x.permute(0, 2, 3, 1) # (B, N, 1, dim*4) + x, node_vec1_prime, node_vec2_prime = self.conv_app_layer(x, node_vec1, node_vec2) + x = x.permute(0, 3, 1, 2) # (B, dim*4, N, 1) + + return x, node_vec1_prime, node_vec2_prime diff --git a/baselines/BigST/arch/model.py b/baselines/BigST/arch/model.py new file mode 100644 index 00000000..31063da6 --- /dev/null +++ b/baselines/BigST/arch/model.py @@ -0,0 +1,122 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from .linear_conv import * +from torch.autograd import Variable +import pdb + +class Model(nn.Module): + def __init__(self, seq_num, in_dim, out_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ + use_residual, use_bn, use_spatial, use_long, dropout, time_of_day_size, day_of_week_size, supports=None, edge_indices=None): + super(Model, self).__init__() + + self.tau = tau + self.layer_num = 3 + self.in_dim = in_dim + self.random_feature_dim = random_feature_dim + + self.use_residual = use_residual + self.use_bn = use_bn + self.use_spatial = use_spatial + self.use_long = use_long + + self.dropout = dropout + self.activation = nn.ReLU() + self.supports = supports + + self.time_num = time_of_day_size + self.week_num = day_of_week_size + + # node embedding layer + self.node_emb_layer = nn.Parameter(torch.empty(num_nodes, node_emb_dim)) + nn.init.xavier_uniform_(self.node_emb_layer) + + # time embedding layer + self.time_emb_layer = nn.Parameter(torch.empty(self.time_num, time_emb_dim)) + nn.init.xavier_uniform_(self.time_emb_layer) + self.week_emb_layer = nn.Parameter(torch.empty(self.week_num, time_emb_dim)) + nn.init.xavier_uniform_(self.week_emb_layer) + + # embedding layer + self.input_emb_layer = nn.Conv2d(seq_num*in_dim, hid_dim, kernel_size=(1, 1), bias=True) + + self.W_1 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) + self.W_2 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) + + self.linear_conv = nn.ModuleList() + self.bn = nn.ModuleList() + + self.supports_len = 0 + if supports is not None: + self.supports_len += len(supports) + + for i in range(self.layer_num): + self.linear_conv.append(linearized_conv(hid_dim*4, hid_dim*4, self.dropout, self.tau, self.random_feature_dim)) + self.bn.append(nn.LayerNorm(hid_dim*4)) + + if self.use_long: + self.regression_layer = nn.Conv2d(hid_dim*4*2+hid_dim+seq_num, out_dim, kernel_size=(1, 1), bias=True) + else: + self.regression_layer = nn.Conv2d(hid_dim*4*2, out_dim, kernel_size=(1, 1), bias=True) + + def forward(self, x, feat=None): + + # x: (B, N, T, D) + B, N, T, D = x.size() + + time_emb = self.time_emb_layer[(x[:, :, -1, 1]*self.time_num).type(torch.LongTensor)] + week_emb = self.week_emb_layer[(x[:, :, -1, 2]).type(torch.LongTensor)] + + # input embedding + x = x.contiguous().view(B, N, -1).transpose(1, 2).unsqueeze(-1) # (B, D*T, N, 1) + input_emb = self.input_emb_layer(x) + + # node embeddings + node_emb = self.node_emb_layer.unsqueeze(0).expand(B, -1, -1).transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) + + # time embeddings + time_emb = time_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) + week_emb = week_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) + + x_g = torch.cat([node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) + x = torch.cat([input_emb, node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) + + # linearized spatial convolution + x_pool = [x] # (B, dim*4, N, 1) + node_vec1 = self.W_1(x_g) # (B, dim, N, 1) + node_vec2 = self.W_2(x_g) # (B, dim, N, 1) + node_vec1 = node_vec1.permute(0, 2, 3, 1) # (B, N, 1, dim) + node_vec2 = node_vec2.permute(0, 2, 3, 1) # (B, N, 1, dim) + for i in range(self.layer_num): + if self.use_residual: + residual = x + x, node_vec1_prime, node_vec2_prime = self.linear_conv[i](x, node_vec1, node_vec2) + + if self.use_residual: + x = x+residual + + if self.use_bn: + x = x.permute(0, 2, 3, 1) # (B, N, 1, dim*4) + x = self.bn[i](x) + x = x.permute(0, 3, 1, 2) + + x_pool.append(x) + x = torch.cat(x_pool, dim=1) # (B, dim*4, N, 1) + + x = self.activation(x) # (B, dim*4, N, 1) + + if self.use_long: + feat = feat.permute(0, 2, 1).unsqueeze(-1) # (B, F, N, 1) + x = torch.cat([x, feat], dim=1) + x = self.regression_layer(x) # (B, N, T) + x = x.squeeze(-1).permute(0, 2, 1) + else: + x = self.regression_layer(x) # (B, N, T) + x = x.squeeze(-1).permute(0, 2, 1) + + return {"prediction": x.transpose(1,2).unsqueeze(-1) + , "node_vec1": node_vec1_prime + , "node_vec2": node_vec2_prime + , "supports": self.supports + , 'use_spatial': self.use_spatial} \ No newline at end of file diff --git a/baselines/BigST/arch/pipeline.py b/baselines/BigST/arch/pipeline.py new file mode 100644 index 00000000..fd4122cb --- /dev/null +++ b/baselines/BigST/arch/pipeline.py @@ -0,0 +1,67 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +from torch.autograd import Variable + +import metrics +from bigst import bigst + +class train_pipeline(): + def __init__(self, scaler, seq_num, in_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ + use_residual, use_bn, use_spatial, use_long, dropout, lrate, wdecay, device, supports, edge_indices): + self.model = bigst(device, seq_num, in_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ + use_residual, use_bn, use_spatial, use_long, dropout, supports=supports, edge_indices=edge_indices) + self.model.to(device) + self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay) + self.loss = metrics.masked_mae + self.scaler = scaler + self.use_spatial = use_spatial + self.clip = 5 + + def train(self, input, real_val, feat=None): + self.model.train() + self.optimizer.zero_grad() + + if self.use_spatial: + output, spatial_loss = self.model(input, feat) + real = self.scaler.inverse_transform(real_val) + predict = self.scaler.inverse_transform(output) + loss = self.loss(predict, real, 0.0)-0.3*spatial_loss + else: + output, _ = self.model(input, feat) + real = self.scaler.inverse_transform(real_val) + predict = self.scaler.inverse_transform(output) + loss = self.loss(predict, real, 0.0) + + loss.backward() + if self.clip is not None: + torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) + self.optimizer.step() + mape = metrics.masked_mape(predict,real,0.0).item() + rmse = metrics.masked_rmse(predict,real,0.0).item() + return loss.item(), mape, rmse + + def eval(self, input, real_val, feat=None, flag='overall'): + if flag=='overall': + self.model.eval() + output, _ = self.model(input, feat) + real = self.scaler.inverse_transform(real_val) + predict = self.scaler.inverse_transform(output) + loss = self.loss(predict, real, 0.0) + mape = metrics.masked_mape(predict,real,0.0).item() + rmse = metrics.masked_rmse(predict,real,0.0).item() + return loss.item(), mape, rmse + elif flag=='horizon': + self.model.eval() + output, _ = self.model(input, feat) + real = self.scaler.inverse_transform(real_val) + predict = self.scaler.inverse_transform(output) + loss = [] + mape = [] + rmse = [] + for i in range(12): + loss.append(self.loss(predict[..., i], real[..., i], 0.0).item()) + mape.append(metrics.masked_mape(predict[..., i], real[..., i], 0.0).item()) + rmse.append(metrics.masked_rmse(predict[..., i], real[..., i], 0.0).item()) + return loss, mape, rmse diff --git a/baselines/BigST/arch/preprocess.py b/baselines/BigST/arch/preprocess.py new file mode 100644 index 00000000..02926a47 --- /dev/null +++ b/baselines/BigST/arch/preprocess.py @@ -0,0 +1,219 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +import sys +import numpy as np +import pdb + +def create_projection_matrix(m, d, seed=0, scaling=0, struct_mode=False): + nb_full_blocks = int(m/d) + block_list = [] + current_seed = seed + for _ in range(nb_full_blocks): + torch.manual_seed(current_seed) + if struct_mode: + q = create_products_of_givens_rotations(d, current_seed) + else: + unstructured_block = torch.randn((d, d)) + q, _ = torch.qr(unstructured_block) + q = torch.t(q) + block_list.append(q) + current_seed += 1 + remaining_rows = m - nb_full_blocks * d + if remaining_rows > 0: + torch.manual_seed(current_seed) + if struct_mode: + q = create_products_of_givens_rotations(d, current_seed) + else: + unstructured_block = torch.randn((d, d)) + q, _ = torch.qr(unstructured_block) + q = torch.t(q) + block_list.append(q[0:remaining_rows]) + final_matrix = torch.vstack(block_list) + + current_seed += 1 + torch.manual_seed(current_seed) + if scaling == 0: + multiplier = torch.norm(torch.randn((m, d)), dim=1) + elif scaling == 1: + multiplier = torch.sqrt(torch.tensor(float(d))) * torch.ones(m) + else: + raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling) + + return torch.matmul(torch.diag(multiplier), final_matrix) + +def create_products_of_givens_rotations(dim, seed): + nb_givens_rotations = dim * int(math.ceil(math.log(float(dim)))) + q = np.eye(dim, dim) + np.random.seed(seed) + for _ in range(nb_givens_rotations): + random_angle = math.pi * np.random.uniform() + random_indices = np.random.choice(dim, 2) + index_i = min(random_indices[0], random_indices[1]) + index_j = max(random_indices[0], random_indices[1]) + slice_i = q[index_i] + slice_j = q[index_j] + new_slice_i = math.cos(random_angle) * slice_i + math.cos(random_angle) * slice_j + new_slice_j = -math.sin(random_angle) * slice_i + math.cos(random_angle) * slice_j + q[index_i] = new_slice_i + q[index_j] = new_slice_j + return torch.tensor(q, dtype=torch.float32) + +def softmax_kernel_transformation(data, is_query, projection_matrix=None, numerical_stabilizer=0.000001): + data_normalizer = 1.0 / torch.sqrt(torch.sqrt(torch.tensor(data.shape[-1], dtype=torch.float32))) + data = data_normalizer * data + ratio = 1.0 / torch.sqrt(torch.tensor(projection_matrix.shape[0], dtype=torch.float32)) + data_dash = torch.einsum("bnhd,md->bnhm", data, projection_matrix) + diag_data = torch.square(data) + diag_data = torch.sum(diag_data, dim=len(data.shape)-1) + diag_data = diag_data / 2.0 + diag_data = torch.unsqueeze(diag_data, dim=len(data.shape)-1) + last_dims_t = len(data_dash.shape) - 1 + attention_dims_t = len(data_dash.shape) - 3 + if is_query: + data_dash = ratio * ( + torch.exp(data_dash - diag_data - torch.max(data_dash, dim=last_dims_t, keepdim=True)[0]) + numerical_stabilizer + ) + else: + data_dash = ratio * ( + torch.exp(data_dash - diag_data - torch.max(torch.max(data_dash, dim=last_dims_t, keepdim=True)[0], + dim=attention_dims_t, keepdim=True)[0]) + numerical_stabilizer + ) + return data_dash + +def numerator(qs, ks, vs): + kvs = torch.einsum("nbhm,nbhd->bhmd", ks, vs) # kvs refers to U_k in the paper + return torch.einsum("nbhm,bhmd->nbhd", qs, kvs) + +def denominator(qs, ks): + all_ones = torch.ones([ks.shape[0]]).to(qs.device) + ks_sum = torch.einsum("nbhm,n->bhm", ks, all_ones) # ks_sum refers to O_k in the paper + return torch.einsum("nbhm,bhm->nbh", qs, ks_sum) + +def linearized_softmax(x, query, key): + # x: [B, N, H, D] query: [B, N, H, m], key: [B, N, H, m] + query = query.permute(1, 0, 2, 3) # [N, B, H, m] + key = key.permute(1, 0, 2, 3) # [N, B, H, m] + x = x.permute(1, 0, 2, 3) # [N, B, H, D] + + z_num = numerator(query, key, x) # [N, B, H, D] + z_den = denominator(query, key) # [N, H] + + z_num = z_num.permute(1, 0, 2, 3) # [B, N, H, D] + z_den = z_den.permute(1, 0, 2) + z_den = torch.unsqueeze(z_den, len(z_den.shape)) + z_output = z_num / z_den # # [B, N, H, D] + + return z_output + +class linearized_attention(nn.Module): + def __init__(self, c_in, c_out, dropout, random_feature_dim=30, tau=1.0, num_heads=4): + super(linearized_attention, self).__init__() + self.Wk = nn.Linear(c_in, c_out * num_heads) + self.Wq = nn.Linear(c_in, c_out * num_heads) + self.Wv = nn.Linear(c_in, c_out * num_heads) + self.Wo = nn.Linear(c_out * num_heads, c_out) + self.c_in = c_in + self.c_out = c_out + self.num_heads = num_heads + self.tau = tau + self.random_feature_dim = random_feature_dim + self.activation = nn.ReLU + self.dropout = dropout + + def reset_parameters(self): + self.Wk.reset_parameters() + self.Wq.reset_parameters() + self.Wv.reset_parameters() + self.Wo.reset_parameters() + + def forward(self, x): + B, T = x.size(0), x.size(1) # (B, T, D) + query = self.Wq(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) + key = self.Wk(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) + x = self.Wv(x).reshape(-1, T, self.num_heads, self.c_out) # (B, T, H, D) + + dim = query.shape[-1] # (B, T, H, D) + seed = torch.ceil(torch.abs(torch.sum(query) * 1e8)).to(torch.int32) + projection_matrix = create_projection_matrix(self.random_feature_dim, dim, seed=seed).to(query.device) # (d, m) + query = query / math.sqrt(self.tau) + key = key / math.sqrt(self.tau) + query = softmax_kernel_transformation(query, True, projection_matrix) # [B, T, H, m] + key = softmax_kernel_transformation(key, False, projection_matrix) # [B, T, H, m] + + x = linearized_softmax(x, query, key) + + x = self.Wo(x.flatten(-2, -1)) # (B, T, D) + + return x + + +class BigSTPreprocess(nn.Module): + """ + Paper: BigST: Linear Complexity Spatio-Temporal Graph Neural Network for Traffic Forecasting on Large-Scale Road Networks + Link: https://dl.acm.org/doi/10.14778/3641204.3641217 + Official Code: https://github.com/usail-hkust/BigST?tab=readme-ov-file + Venue: VLDB 2024 + Task: Spatial-Temporal Forecasting + """ + def __init__(self, input_length, output_length, in_dim, num_nodes, nhid, tiny_batch_size, dropout=0.3): + super(BigSTPreprocess, self).__init__() + self.tau = 1.0 + self.layer_num = 3 + self.random_feature_dim = nhid*2 + + self.use_residual = True + self.use_bn = False + self.use_act = True + + self.dropout = dropout + self.activation = nn.ReLU() + + self.fc_convs = nn.ModuleList() + self.transformer_layer = nn.ModuleList() + self.bn = nn.ModuleList() + self.context_conv = nn.Conv2d(in_channels=in_dim, out_channels=nhid, kernel_size=(12, 1), stride=(12, 1)) + + self.temporal_embedding = nn.Parameter(torch.empty(int(input_length/12), nhid), requires_grad=True) # (C, nhid) + nn.init.xavier_uniform_(self.temporal_embedding) + + for i in range(self.layer_num): + self.transformer_layer.append(linearized_attention(nhid, nhid, self.dropout, self.random_feature_dim, self.tau)) + self.bn.append(nn.LayerNorm(nhid)) + + self.regression_layer = nn.Linear(nhid, output_length) + + self.tiny_batch_size = tiny_batch_size + + def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor: + x = history_data + # input: (1, 9638, 2016, 3) (B, N, T, D) + B, N, T, D = x.size() + pe = self.temporal_embedding.unsqueeze(0).expand(B*N, -1, -1) # (B*N, T/12, nhid) + + x = x.reshape(B*N, T, D) + x = x.permute(0, 2, 1).unsqueeze(-1) # (B*N, T, D) -> (B*N, D, T, 1) + + # convolution layer + x = self.context_conv(x) # (B*N, D, T, 1) -> (B*N, nhid, T/12, 1) + x = x.squeeze(-1) # (B*N, nhid, T/12) + + # temporal embedding layer + x = x.permute(0, 2, 1) # (B*N, T/12, nhid) + x = x+pe # (B*N, T/12, nhid) + + # linearized attention + for num in range(self.layer_num): + residual = x # (B*N, T/12, nhid) + x = self.transformer_layer[num](x) # (B*N, T/12, nhid) + x = self.bn[num](x) + x = x+residual # (B*N, T/12, nhid) + + x = self.activation(x) # (B*N, T/12, nhid) + x = x[:, -1, :] + # x = torch.sum(x, dim=1) # (B*N, nhid) + feat = x.view(B, N, -1) # (B, N, nhid) + x = self.regression_layer(feat) # (B, N, output_length) + return {'prediction': x.transpose(1,2).unsqueeze(-1), 'feat':feat} \ No newline at end of file diff --git a/baselines/BigST/arch/random_map.py b/baselines/BigST/arch/random_map.py new file mode 100644 index 00000000..ea7e49d4 --- /dev/null +++ b/baselines/BigST/arch/random_map.py @@ -0,0 +1,81 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable + +def create_products_of_givens_rotations(dim, seed): + nb_givens_rotations = dim * int(math.ceil(math.log(float(dim)))) + q = np.eye(dim, dim) + np.random.seed(seed) + for _ in range(nb_givens_rotations): + random_angle = math.pi * np.random.uniform() + random_indices = np.random.choice(dim, 2) + index_i = min(random_indices[0], random_indices[1]) + index_j = max(random_indices[0], random_indices[1]) + slice_i = q[index_i] + slice_j = q[index_j] + new_slice_i = math.cos(random_angle) * slice_i + math.cos(random_angle) * slice_j + new_slice_j = -math.sin(random_angle) * slice_i + math.cos(random_angle) * slice_j + q[index_i] = new_slice_i + q[index_j] = new_slice_j + return torch.tensor(q, dtype=torch.float32) + +def create_random_matrix(m, d, seed=0, scaling=0, struct_mode=False): + nb_full_blocks = int(m/d) + block_list = [] + current_seed = seed + for _ in range(nb_full_blocks): + torch.manual_seed(current_seed) + if struct_mode: + q = create_products_of_givens_rotations(d, current_seed) + else: + unstructured_block = torch.randn((d, d)) + q, _ = torch.qr(unstructured_block) + q = torch.t(q) + block_list.append(q) + current_seed += 1 + remaining_rows = m - nb_full_blocks * d + if remaining_rows > 0: + torch.manual_seed(current_seed) + if struct_mode: + q = create_products_of_givens_rotations(d, current_seed) + else: + unstructured_block = torch.randn((d, d)) + q, _ = torch.qr(unstructured_block) + q = torch.t(q) + block_list.append(q[0:remaining_rows]) + final_matrix = torch.vstack(block_list) + + current_seed += 1 + torch.manual_seed(current_seed) + if scaling == 0: + multiplier = torch.norm(torch.randn((m, d)), dim=1) + elif scaling == 1: + multiplier = torch.sqrt(torch.tensor(float(d))) * torch.ones(m) + else: + raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling) + + return torch.matmul(torch.diag(multiplier), final_matrix) + +def random_feature_map(data, is_query, projection_matrix=None, numerical_stabilizer=0.000001): + data_normalizer = 1.0 / torch.sqrt(torch.sqrt(torch.tensor(data.shape[-1], dtype=torch.float32))) + data = data_normalizer * data + ratio = 1.0 / torch.sqrt(torch.tensor(projection_matrix.shape[0], dtype=torch.float32)) + data_dash = torch.einsum("bnhd,md->bnhm", data, projection_matrix) + diag_data = torch.square(data) + diag_data = torch.sum(diag_data, dim=len(data.shape)-1) + diag_data = diag_data / 2.0 + diag_data = torch.unsqueeze(diag_data, dim=len(data.shape)-1) + last_dims_t = len(data_dash.shape) - 1 + attention_dims_t = len(data_dash.shape) - 3 + if is_query: + data_dash = ratio * ( + torch.exp(data_dash - diag_data - torch.max(data_dash, dim=last_dims_t, keepdim=True)[0]) + numerical_stabilizer + ) + else: + data_dash = ratio * ( + torch.exp(data_dash - diag_data - torch.max(torch.max(data_dash, dim=last_dims_t, keepdim=True)[0], + dim=attention_dims_t, keepdim=True)[0]) + numerical_stabilizer + ) + return data_dash diff --git a/baselines/BigST/loss/__init__.py b/baselines/BigST/loss/__init__.py new file mode 100644 index 00000000..c22530d7 --- /dev/null +++ b/baselines/BigST/loss/__init__.py @@ -0,0 +1 @@ +from .loss import bigst_loss \ No newline at end of file diff --git a/baselines/BigST/loss/loss.py b/baselines/BigST/loss/loss.py new file mode 100644 index 00000000..831f541e --- /dev/null +++ b/baselines/BigST/loss/loss.py @@ -0,0 +1,35 @@ +import torch +import numpy as np +from basicts.metrics import masked_mae + +def spatial_loss(node_vec1, node_vec2, supports, edge_indices): + B = node_vec1.size(0) + node_vec1 = node_vec1.permute(1, 0, 2, 3) # [N, B, 1, r] + node_vec2 = node_vec2.permute(1, 0, 2, 3) # [N, B, 1, r] + + node_vec1_end, node_vec2_start = node_vec1[edge_indices[:, 0]], node_vec2[edge_indices[:, 1]] # [E, B, 1, r] + attn1 = torch.einsum("ebhm,ebhm->ebh", node_vec1_end, node_vec2_start) # [E, B, 1] + attn1 = attn1.permute(1, 0, 2) # [B, E, 1] + + one_matrix = torch.ones([node_vec2.shape[0]]).to(node_vec1.device) + node_vec2_sum = torch.einsum("nbhm,n->bhm", node_vec2, one_matrix) + attn_norm = torch.einsum("nbhm,bhm->nbh", node_vec1, node_vec2_sum) + + attn2 = attn_norm[edge_indices[:, 0]] # [E, B, 1] + attn2 = attn2.permute(1, 0, 2) # [B, E, 1] + attn_score = attn1 / attn2 # [B, E, 1] + + d_norm = supports[0][edge_indices[:, 0], edge_indices[:, 1]] + d_norm = d_norm.reshape(1, -1, 1).repeat(B, 1, attn_score.shape[-1]) + spatial_loss = torch.mean(attn_score.log() * d_norm) + + return spatial_loss + +def bigst_loss(prediction, target, node_vec1, node_vec2, supports, use_spatial): + if use_spatial: + supports = [support.to(prediction.device) for support in supports] + edge_indices = torch.nonzero(supports[0] > 0) + s_loss = spatial_loss(node_vec1, node_vec2, supports, edge_indices) + return masked_mae(prediction, target, 0.0) - 0.3 * s_loss # 源代码:pipline.py line30 + else: + return masked_mae(prediction, target, 0.0) \ No newline at end of file diff --git a/baselines/BigST/runner/__init__.py b/baselines/BigST/runner/__init__.py new file mode 100644 index 00000000..2a0ecce8 --- /dev/null +++ b/baselines/BigST/runner/__init__.py @@ -0,0 +1 @@ +from .bigstpreprocess_runner import BigSTPreprocessRunner \ No newline at end of file diff --git a/baselines/BigST/runner/bigstpreprocess_runner.py b/baselines/BigST/runner/bigstpreprocess_runner.py new file mode 100644 index 00000000..fbff8c45 --- /dev/null +++ b/baselines/BigST/runner/bigstpreprocess_runner.py @@ -0,0 +1,48 @@ +from typing import Tuple, Union, Dict +import torch +import numpy as np +import wandb +import pdb +import os + +from basicts.runners import SimpleTimeSeriesForecastingRunner + + +class BigSTPreprocessRunner(SimpleTimeSeriesForecastingRunner): + def __init__(self, cfg: dict): + super().__init__(cfg) + + self.tiny_batch_size = cfg.MODEL.PARAM['tiny_batch_size'] + + def preprocessing(self, input_data: Dict) -> Dict: + """Preprocess data. + + Args: + input_data (Dict): Dictionary containing data to be processed. + + Returns: + Dict: Processed data. + """ + + input_data = super().preprocessing(input_data) + + x = input_data['inputs'] + y = input_data['target'] + + B, T, N, F = x.shape + batch_num = int(B * N / self.tiny_batch_size) # 似乎要确保不能等于0 + idx_perm = np.random.permutation([i for i in range(B*N)]) + + for j in range(batch_num): + if j==batch_num-1: + x_ = x[:, :, idx_perm[(j+1)*self.tiny_batch_size:], :] + y_ = y[:, :, idx_perm[(j+1)*self.tiny_batch_size:], :] + else: + x_ = x[:, :, idx_perm[j*self.tiny_batch_size:(j+1)*self.tiny_batch_size], :] + y_ = y[:, :, idx_perm[j*self.tiny_batch_size:(j+1)*self.tiny_batch_size], :] + + input_data['inputs'] = x_.transpose(1,2) + input_data['target'] = y_ + return input_data + + \ No newline at end of file