diff --git a/basicts/metrics/__init__.py b/basicts/metrics/__init__.py index c06c1b51..4d88ba8e 100644 --- a/basicts/metrics/__init__.py +++ b/basicts/metrics/__init__.py @@ -3,13 +3,19 @@ from .mse import masked_mse from .rmse import masked_rmse from .wape import masked_wape +from .smape import masked_smape +from .r_square import masked_r2 +from .corr import masked_corr ALL_METRICS = { 'MAE': masked_mae, 'MSE': masked_mse, 'RMSE': masked_rmse, 'MAPE': masked_mape, - 'WAPE': masked_wape + 'WAPE': masked_wape, + 'SMAPE': masked_smape, + 'R2': masked_r2, + 'CORR': masked_corr } __all__ = [ @@ -18,5 +24,8 @@ 'masked_rmse', 'masked_mape', 'masked_wape', + 'masked_smape', + 'masked_r2', + 'masked_corr', 'ALL_METRICS' -] +] \ No newline at end of file diff --git a/basicts/metrics/corr.py b/basicts/metrics/corr.py new file mode 100644 index 00000000..95cbdcdd --- /dev/null +++ b/basicts/metrics/corr.py @@ -0,0 +1,50 @@ +import numpy as np +import torch + + +def masked_corr(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor: + """ + Calculate the Masked Pearson Correlation Coefficient between the predicted and target values, + while ignoring the entries in the target tensor that match the specified null value. + + This function is particularly useful for scenarios where the dataset contains missing or irrelevant + values (denoted by `null_val`) that should not contribute to the loss calculation. It effectively + masks these values to ensure they do not skew the error metrics. + + Args: + prediction (torch.Tensor): The predicted values as a tensor. + target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`. + null_val (float, optional): The value considered as null or missing in the `target` tensor. + Default is `np.nan`. The function will mask all `NaN` values in the target. + + Returns: + torch.Tensor: A scalar tensor representing the masked mean absolute error. + + """ + + if np.isnan(null_val): + mask = ~torch.isnan(target) + else: + eps = 5e-5 + mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.0) + + mask = mask.float() + mask /= torch.mean(mask) # Normalize mask to avoid bias in the loss due to the number of valid entries + mask = torch.nan_to_num(mask) # Replace any NaNs in the mask with zero + + prediction_mean = torch.mean(prediction, dim=1, keepdim=True) + target_mean = torch.mean(target, dim=1, keepdim=True) + + # 计算偏差 (X - mean_X) 和 (Y - mean_Y) + prediction_dev = prediction - prediction_mean + target_dev = target - target_mean + + # 计算皮尔逊相关系数 + numerator = torch.sum(prediction_dev * target_dev, dim=1, keepdim=True) # 分子 + denominator = torch.sqrt(torch.sum(prediction_dev ** 2, dim=1, keepdim=True) * torch.sum(target_dev ** 2, dim=1, keepdim=True)) # 分母 + loss = numerator / denominator + + loss = loss * mask # Apply the mask to the loss + loss = torch.nan_to_num(loss) # Replace any NaNs in the loss with zero + + return torch.mean(loss) \ No newline at end of file diff --git a/basicts/metrics/r_square.py b/basicts/metrics/r_square.py new file mode 100644 index 00000000..5034440f --- /dev/null +++ b/basicts/metrics/r_square.py @@ -0,0 +1,44 @@ +import numpy as np +import torch + + +def masked_r2(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor: + """ + Calculate the Masked R square between the predicted and target values, + while ignoring the entries in the target tensor that match the specified null value. + + This function is particularly useful for scenarios where the dataset contains missing or irrelevant + values (denoted by `null_val`) that should not contribute to the loss calculation. It effectively + masks these values to ensure they do not skew the error metrics. + + Args: + prediction (torch.Tensor): The predicted values as a tensor. + target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`. + null_val (float, optional): The value considered as null or missing in the `target` tensor. + Default is `np.nan`. The function will mask all `NaN` values in the target. + + Returns: + torch.Tensor: A scalar tensor representing the masked mean absolute error. + + """ + + eps = 5e-5 + if np.isnan(null_val): + mask = ~torch.isnan(target) + else: + mask = ~torch.isclose(target, torch.tensor(null_val).expand_as(target).to(target.device), atol=eps, rtol=0.0) + + mask = mask.float() + prediction, target = prediction * mask, target * mask + + prediction = torch.nan_to_num(prediction) + target = torch.nan_to_num(target) + + ss_res = torch.sum(torch.pow((target - prediction), 2), dim=1) # 残差平方和 + ss_tot = torch.sum(torch.pow(target - torch.mean(target, dim=1, keepdim=True), 2), dim=1) # 总平方和 + + # 计算 R^2 + loss = 1 - (ss_res / (ss_tot + eps)) + + loss = torch.nan_to_num(loss) # Replace any NaNs in the loss with zero + return torch.mean(loss) diff --git a/basicts/metrics/smape.py b/basicts/metrics/smape.py new file mode 100644 index 00000000..48e41666 --- /dev/null +++ b/basicts/metrics/smape.py @@ -0,0 +1,53 @@ +import torch +import numpy as np + +def masked_smape(prediction: torch.Tensor, target: torch.Tensor, null_val: float = np.nan) -> torch.Tensor: + """ + Calculate the Masked Symmetric Mean Absolute Percentage Error (SMAPE) between predicted and target values, + ignoring entries that are either zero or match the specified null value in the target tensor. + + This function is particularly useful for time series or regression tasks where the target values may + contain zeros or missing values, which could otherwise distort the error calculation. The function + applies a mask to ensure these entries do not affect the resulting MAPE. + + Args: + prediction (torch.Tensor): The predicted values as a tensor. + target (torch.Tensor): The ground truth values as a tensor with the same shape as `prediction`. + null_val (float, optional): The value considered as null or missing in the `target` tensor. + Defaults to `np.nan`. The function will mask all `NaN` values in the target. + + Returns: + torch.Tensor: A scalar tensor representing the masked mean absolute percentage error. + + Details: + - The function creates two masks: + 1. `zero_mask`: This mask excludes entries in the `target` tensor that are close to zero, + since division by zero or near-zero values would result in extremely large or undefined errors. + 2. `null_mask`: This mask excludes entries in the `target` tensor that match the specified `null_val`. + If `null_val` is `np.nan`, the mask will exclude `NaN` values using `torch.isnan`. + + - The final mask is the intersection of `zero_mask` and `null_mask`, ensuring that only valid, non-zero, + and non-null values contribute to the MAPE calculation. + """ + + # mask to exclude zero values in the target + zero_mask = ~torch.isclose(target, torch.tensor(0.0).to(target.device), atol=5e-5) + + # mask to exclude null values in the target + if np.isnan(null_val): + null_mask = ~torch.isnan(target) + else: + eps = 5e-5 + null_mask = ~torch.isclose(target, torch.tensor(null_val).to(target.device), atol=eps) + + # combine zero and null masks + mask = (zero_mask & null_mask).float() + + mask /= torch.mean(mask) + mask = torch.nan_to_num(mask) + + loss = torch.abs(prediction - target) / ((prediction.abs() + target.abs()) / 2) + loss *= mask + loss = torch.nan_to_num(loss) + + return torch.mean(loss)