From 0e717c5cd56ab79ec2017bd98d45bb2dc872910f Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Mon, 17 Jun 2024 21:34:49 +0800 Subject: [PATCH 1/9] docs: update the repo token path; --- .github/workflows/greetings.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml index c6c4be6..751fbf2 100644 --- a/.github/workflows/greetings.yml +++ b/.github/workflows/greetings.yml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/first-interaction@v1 with: - repo-token: ${{ secrets.ACCESS_TOKEN }} + repo-token: ${{ secrets.GITHUB_TOKEN }} issue-message: | Hi there 👋, @@ -34,7 +34,7 @@ jobs: pr-message: | Hi there 👋, - We really really appreciate that you have taken the time to make this PR on PyPOTS' Awesome Imputation project! + We really appreciate that you have taken the time to make this PR on PyPOTS' Awesome Imputation project! If you are trying to fix a bug, please reference the issue number in the description or give your details about the bug. If you are implementing a feature request, please check with the maintainers that the feature will be accepted first. From e4792f421730c6e43e8e14ddedd3605df6305450 Mon Sep 17 00:00:00 2001 From: LINGLONGQIAN <15869023990@163.com> Date: Mon, 17 Jun 2024 20:18:58 +0100 Subject: [PATCH 2/9] hpo update --- benchmark_code/hpo_results/electricity.py | 6 +- benchmark_code/hpo_results/physionet2019.py | 102 +++++--------------- 2 files changed, 28 insertions(+), 80 deletions(-) diff --git a/benchmark_code/hpo_results/electricity.py b/benchmark_code/hpo_results/electricity.py index a9b1f6a..e9b8c83 100644 --- a/benchmark_code/hpo_results/electricity.py +++ b/benchmark_code/hpo_results/electricity.py @@ -24,7 +24,7 @@ "SAITS": { 'n_steps': 96, 'n_features': 370, - 'epochs': 200, + 'epochs': 100, 'patience': 10, 'n_layers': 2, 'd_model': 64, @@ -236,7 +236,7 @@ 'n_steps': 96, 'n_features': 370, 'patience': 10, - 'epochs': 200, + 'epochs': 100, 'lr': 0.00031344111157861616, 'rnn_hidden_size': 512, 'dropout': 0.3, @@ -278,7 +278,7 @@ 'n_steps': 96, 'n_features': 370, 'patience': 10, - 'epochs': 200, + 'epochs': 100, 'rnn_hidden_size': 1024, 'lr': 0.000648986719843512, }, diff --git a/benchmark_code/hpo_results/physionet2019.py b/benchmark_code/hpo_results/physionet2019.py index d371a12..27c3f2e 100644 --- a/benchmark_code/hpo_results/physionet2019.py +++ b/benchmark_code/hpo_results/physionet2019.py @@ -8,7 +8,7 @@ PhysioNet2019 = { "iTransformer": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 4, @@ -23,7 +23,7 @@ }, "SAITS": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 3, @@ -38,7 +38,7 @@ }, "FreTS": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "embed_size": 256, @@ -48,7 +48,7 @@ }, "Koopa": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_seg_steps": 12, @@ -60,7 +60,7 @@ }, "Crossformer": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 3, @@ -75,7 +75,7 @@ }, "TimesNet": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "patience": 10, "epochs": 100, "n_layers": 3, @@ -88,7 +88,7 @@ }, "PatchTST": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "patch_len": 8, @@ -105,7 +105,7 @@ }, "ETSformer": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_e_layers": 3, @@ -119,7 +119,7 @@ }, "MICN": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 2, @@ -128,18 +128,10 @@ "dropout": 0.2, "lr": 0.0000852263260132517, }, - "DLinear": { - "n_steps": 48, - "n_features": 34, - "epochs": 100, - "patience": 10, - "moving_avg_window_size": 13, - "d_model": 1024, - "lr": 0.0001633470877552397 - }, + "DLinear": { "n_steps": 48, "n_features": 33, "epochs": 100, "patience": 10, "moving_avg_window_size": 13, "d_model": 1024, "lr": 0.0001633470877552397 }, "SCINet": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_stacks": 2, @@ -152,7 +144,7 @@ }, "NonstationaryTransformer": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 1, @@ -167,22 +159,10 @@ "dropout": 0.2, "lr": 0.00010519284750234083 }, - "FiLM": { - "n_steps": 48, - "n_features": 34, - "epochs": 100, - "patience": 10, - "window_size": [ 2 ], - "multiscale": [ 1, 2 ], - "modes1": 32, - "dropout": 0.4, - "mode_type": 0, - "d_model": 64, - "lr": 0.008109542467067061 - }, + "FiLM": { "n_steps": 48, "n_features": 33, "epochs": 100, "patience": 10, "window_size": [ 2 ], "multiscale": [ 1, 2 ], "modes1": 32, "dropout": 0.4, "mode_type": 0, "d_model": 64, "lr": 0.008109542467067061 }, "Pyraformer": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 3, @@ -198,36 +178,11 @@ "attn_dropout": 0.1, "lr": 0.0002148305519207637 }, - "Autoformer": { - "n_steps": 48, - "n_features": 34, - "epochs": 100, - "patience": 10, - "n_layers": 1, - "d_model": 128, - "d_ffn": 1024, - "n_heads": 4, - "factor": 3, - "moving_avg_window_size": 13, - "dropout": 0, - "lr": 0.00026656159603612764 - }, - "CSDI": { - "n_steps": 48, - "n_features": 34, - "patience": 10, - "epochs": 100, - "n_layers": 4, - "n_heads": 8, - "n_channels": 32, - "d_time_embedding": 256, - "d_feature_embedding": 16, - "d_diffusion_embedding": 256, - "lr": 0.0018788258888970985 - }, + "Autoformer": { "n_steps": 48, "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 1, "d_model": 128, "d_ffn": 1024, "n_heads": 4, "factor": 3, "moving_avg_window_size": 13, "dropout": 0, "lr": 0.00026656159603612764 }, + "CSDI": { "n_steps": 48, "n_features": 33, "patience": 10, "epochs": 100, "n_layers": 4, "n_heads": 8, "n_channels": 32, "d_time_embedding": 256, "d_feature_embedding": 16, "d_diffusion_embedding": 256, "lr": 0.0018788258888970985 }, "Informer": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 3, @@ -240,7 +195,7 @@ }, "USGAN": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "patience": 10, "epochs": 100, "lr": 0.0007792852806075814, @@ -249,7 +204,7 @@ }, "StemGNN": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 2, @@ -260,7 +215,7 @@ }, "GPVAE": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "latent_size": 34, "patience": 10, "epochs": 100, @@ -280,23 +235,16 @@ }, "MRNN": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "patience": 10, "epochs": 100, "rnn_hidden_size": 512, "lr": 0.006155127814415844 }, - "BRITS": { - "n_steps": 48, - "n_features": 34, - "patience": 10, - "epochs": 100, - "rnn_hidden_size": 512, - "lr": 0.0005763283506002885 - }, + "BRITS": { "n_steps": 48, "n_features": 33, "patience": 10, "epochs": 100, "rnn_hidden_size": 512, "lr": 0.0005763283506002885 }, "GRUD": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "rnn_hidden_size": 32, @@ -304,7 +252,7 @@ }, "Transformer": { "n_steps": 48, - "n_features": 34, + "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 6, @@ -317,4 +265,4 @@ "attn_dropout": 0.3, "lr": 0.00011675690237576063 }, -} +} \ No newline at end of file From ffe233ad0147236b9513f1ead72005ad35c70d2c Mon Sep 17 00:00:00 2001 From: Jun Wang <140130585+AugustJW@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:43:36 +0800 Subject: [PATCH 3/9] Update README.md add an AISTATS24 paper --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 5754c9c..f7fe1fe 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,10 @@ researchers and practitioners who are interested in this field. [[paper](https://openreview.net/pdf?id=K1mcPiDdOJ)] [[official code](https://github.com/Chemgyu/TimeCIB)] +[AISTATS] **SADI: Similarity-Aware Diffusion Model-Based Imputation for Incomplete Temporal EHR Data** +[[paper](https://proceedings.mlr.press/v238/dai24c/dai24c.pdf)] +[[official code](https://github.com/bestadcarry/SADI-Similarity-Aware-Diffusion-Model-Based-Imputation-for-Incomplete-Temporal-EHR-Data)] + ### `Year 2023` From cc532152c38bb32dfb51b8f70b9986ae110cc31a Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 19 Jun 2024 09:11:56 +0800 Subject: [PATCH 4/9] docs: update README; --- README.md | 56 +++++++++------ benchmark_code/README.md | 129 +++++++++++++++++++++------------- benchmark_code/data/README.md | 12 +++- 3 files changed, 127 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 5754c9c..1050e78 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,17 @@

- Time Series Imputation Survey

-The open-resource repository for the paper [**Deep Learning for Multivariate Time Series Imputation: A Survey**](https://arxiv.org/abs/2402.04059) +The open-resource repository for the paper [**TSI-Bench: Benchmarking Time Series Imputation**]() from PyPOTS Research. -The code and configurations for reproducing the experimental results in the paper are available under -the folder `time_series_imputation_survey_code`. - -If you find this repository helpful to your work, please kindly star it and cite our survey paper (author profile links: -[Jun Wang](https://github.com/AugustJW), [Wenjie Du](https://github.com/WenjieDu), -[Wei Cao](https://weicao1990.github.io/), [Keli Zhang](https://github.com/kelizhang), [Wenjia Wang](https://www.wenjia-w.com/home), -[Yuxuan Liang](https://yuxuanliang.com/), [Qingsong Wen](https://sites.google.com/site/qingsongwen8/)) as follows: - -```bibtex -@article{wang2024deep, -title={Deep Learning for Multivariate Time Series Imputation: A Survey}, -author={Wang, Jun and Du, Wenjie and Cao, Wei and Zhang, Keli and Wang, Wenjia and Liang, Yuxuan and Wen, Qingsong}, -journal={arXiv preprint arXiv:2402.04059}, -year={2024} -} -``` +The code and configurations for reproducing the experimental results in the paper are available under the folder `benchmark_code`. +The README file here maintains a list of must-read papers on time-series imputation, and a collection of time-series imputation toolkits and resources. 🤗 Contributions to update new resources and articles are very welcome! @@ -215,13 +203,41 @@ researchers and practitioners who are interested in this field. ## ❖ Other Resources -### Repos about General Time Series +### `Articles about General Missingness and Imputation` +[blog] [**Data Imputation: An essential yet overlooked problem in machine learning**](https://www.vanderschaar-lab.com/data-imputation-an-essential-yet-overlooked-problem-in-machine-learning/) + +[Journal of Big Data] **A survey on missing data in machine learning** +[[paper](https://journalofbigdata.springeropen.com/articles/10.1186/s40537-021-00516-9)] + + +### `Repos about General Time Series` [Transformers in Time Series](https://github.com/qingsongedu/time-series-transformers-review) [LLMs and Foundation Models for Time Series and Spatio-Temporal Data](https://github.com/qingsongedu/Awesome-TimeSeries-SpatioTemporal-LM-LLM) [AI for Time Series (AI4TS) Papers, Tutorials, and Surveys](https://github.com/qingsongedu/awesome-AI-for-time-series-papers) +## ❖ Citing This Work +If you find this repository helpful to your work, please kindly star it and cite our benchmark paper and survey paper as follows: + +```bibtex +@article{du2024tsibench, +title={TSI-Bench: Benchmarking Time Series Imputation}, +author={Wenjie Du and Jun Wang and Linglong Qian and Yiyuan Yang and Fanxing Liu and Zepu Wang and Zina Ibrahim and Haoxin Liu and Zhiyuan Zhao and Yingjie Zhou and Wenjia Wang and Kaize Ding and Yuxuan Liang and B. Aditya Prakash and Qingsong Wen}, +journal={arXiv preprint arXiv:2406.12747}, +year={2024} +} +``` + +```bibtex +@article{wang2024deep, +title={Deep Learning for Multivariate Time Series Imputation: A Survey}, +author={Jun Wang and Wenjie Du and Wei Cao and Keli Zhang and Wenjia Wang and Yuxuan Liang and Qingsong Wen}, +journal={arXiv preprint arXiv:2402.04059}, +year={2024} +} +``` +
🏠 Visits diff --git a/benchmark_code/README.md b/benchmark_code/README.md index 2711fe0..1232e1b 100644 --- a/benchmark_code/README.md +++ b/benchmark_code/README.md @@ -1,75 +1,108 @@ -# Code for the Time Series Imputation Survey -The scripts and configurations used in the work are all put here. - +# TSI-Bench +The code scripts, configurations, and logs here are for TSI-Bench, +the first comprehensive benchmark for time series imputation. ## ❖ Python Environment Creation A proper Python environment is necessary to reproduce the results. Please ensure that all the below library requirements are satisfied. ```yaml -pypots >=0.4 -tsdb >=0.2 -pygrinder >=0.4 +tsdb ==0.4 +pygrinder ==0.6 +benchpots ==0.1 +pypots ==0.6 ``` For Linux OS, it is able to create the environment with Conda by running `conda create -f conda_env.yml`. For other OS, library version requirements can also be checked out in `conda_env.yml`. -## ❖ Datasets Introduction and Generation -### Introduction -#### Air -Air (Beijing Multi-Site Air-Quality) is collected from twelve Beijing monitoring sites hourly in forty-eight months. -At each site, eleven air pollution variables (e.g. PM2.5, NO, O3) are collected. -The dataset has 1.6% originally missing data. - -#### PhysioNet2012 -PhysioNet2012 (PhysioNet-2012 Mortality Prediction Challenge) includes multivariate clinical time series data -collected from 11,988 patients in ICU. Each sample contains thirty-seven measurements (e.g. glucose, heart rate, -temperature) recorded in the first forty-eight hours after admission to the ICU. -This dataset has 80% values missing. +## ❖ Datasets Generation +Please refer to [`data/README.md`](data/README.md). -#### ETTm1 -ETTm1 (Electricity Transformer Temperature) records seven state features, including oil temperature and six power -load variables of electricity transformers collected every fifteen minutes for two years. -There is no original missingness in this dataset. -### Generation -The scripts for generating three datasets used in this work are in the directory `data_processing`. -To generate the preprocessed datasets, please run the shell script `generate_datasets.sh` or -execute the below commands: +## ❖ Results Reproduction +### Neural network training +For example, to reproduce the results of SAITS on the dataset Pedestrian, please execute the following command. ```shell -# generate PhysioNet2012 dataset -python data/gene_physionet_2012.py +nohup python train_model.py \ + --model SAITS \ + --dataset Pedestrian \ + --dataset_fold_path data/melbourne_pedestrian_rate01_step24_point \ + --saving_path results_point_rate01 \ + --device cuda:2 \ + > results_point_rate01/SAITS_pedestrian.log & +``` -# generate Air dataset -python data/gene_air_quality.py +After the execution finished, please check out the logging information in the according `.log` file. -# generate ETTm1 dataset -python data/gene_ettm1.py -``` +Additionally, as claimed in the paper, hyperparameters of all models get optimized by the tuning functionality in +[PyPOTS](https://github.com/WenjieDu/PyPOTS). Hence, tuning configurations are available in the directory `PyPOTS_tuning_configs`. +If you'd like to explore this feature, please check out the details there. +### Naive methods +To obtain the results of the naive methods, check out the commands in the shell script `naive_imputation.sh`. -## ❖ Model Training and Results Reproduction -```shell -# reproduce the results on the dataset PhysioNet2012 -nohup python train_models_for_physionet2012.py > physionet2012.log& -# reproduce the results on the dataset Air -nohup python train_models_for_air.py > air.log& +## ❖ Downstream Tasks + -# reproduce the results on the dataset ETTm1 -nohup python train_models_for_ettm1.py > ettm1.log& +### Classification + +```shell +python downstream_classification.py \ + --model SAITS \ + --dataset PhysioNet2012 \ + --dataset_fold_path data/physionet_2012_rate01_point \ + --model_result_parent_fold results_point_rate01/SAITS_PhysioNet2012 \ + --device cuda:0 \ + --n_classes 2 + + python downstream_classification.py \ + --model SAITS \ + --dataset Pedestrian \ + --dataset_fold_path data/melbourne_pedestrian_rate01_step24_point \ + --model_result_parent_fold results_point_rate01/SAITS_Pedestrian \ + --device cuda:2 \ + --n_classes 10 + +python downstream_classification_naive.py \ + --dataset_fold_path data/physionet_2012_rate01_point \ + --dataset PhysioNet2012 \ + --device cuda:3 \ + --n_classes 2 ``` -After all execution finished, please check out all logging information in the according `.log` files. +### Regression -Additionally, as claimed in the paper, hyperparameters of all models get optimized by the tuning functionality in -[PyPOTS](https://github.com/WenjieDu/PyPOTS). Hence, tuning configurations are available in the directory `PyPOTS_tuning_configs`. -If you'd like to explore this feature, please check out the details there. +```shell +python downstream_regression.py \ + --model SAITS \ + --dataset ETT_h1 \ + --dataset_fold_path data/ett_rate01_step48_point \ + --model_result_parent_fold results_point_rate01/SAITS_ETT_h1 \ + --device cuda:0 + +python downstream_regression_naive.py \ + --dataset_fold_path data/ett_rate01_step48_point \ + --dataset ETT_h1 \ + --device cuda:3 +``` -## ❖ Downstream Classification -After running `train_models_for_physionet2012.py`, all models' imputation results are persisted under according folders. -To obtain the simple RNN's classification results on PhysioNet2012, please execute the script `downstream_classification.py`. \ No newline at end of file +### Forecasting + +```shell +python downstream_forecasting.py \ + --model SAITS \ + --dataset ETT_h1 \ + --dataset_fold_path data/ett_rate01_step48_point \ + --model_result_parent_fold results_point_rate01/SAITS_ETT_h1 \ + --device cuda:0 + +python downstream_forecasting_naive.py \ + --dataset_fold_path data/ett_rate01_step48_point \ + --dataset ETT_h1 \ + --device cuda:3 +``` \ No newline at end of file diff --git a/benchmark_code/data/README.md b/benchmark_code/data/README.md index 868720d..7037eb0 100644 --- a/benchmark_code/data/README.md +++ b/benchmark_code/data/README.md @@ -1,5 +1,13 @@ # Data generation -Run `python dataset_generating.py` to generate datasets. +Run the below commands to generate datasets for experiments. +Note that, for PeMS traffic dataset, you have to put the `traffic.csv` file under the current directory. +You can download it from https://pems.dot.ca.gov. For other dataset, they are integrated into `TSDB` and can be directly used. -Note that, for PeMS traffic dataset, you have to put the `traffic.csv` file under the current directory. \ No newline at end of file +```shell +python dataset_generating_point01.py +python dataset_generating_point05.py +python dataset_generating_point09.py +python dataset_generating_subseq05.py +python dataset_generating_block05.py +``` \ No newline at end of file From fdf1fb5d2b47543f6d13f577ca9deaec9f5fd64f Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 19 Jun 2024 09:12:39 +0800 Subject: [PATCH 5/9] feat: update code; --- benchmark_code/conda_env.yml | 8 +- .../data/dataset_generating_block05.py | 2 +- .../data/dataset_generating_point01.py | 2 +- .../data/dataset_generating_point05.py | 2 +- .../data/dataset_generating_point09.py | 2 +- .../data/dataset_generating_subseq05.py | 2 +- benchmark_code/hpo_results/physionet2019.py | 166 +++++++++++------- 7 files changed, 113 insertions(+), 71 deletions(-) diff --git a/benchmark_code/conda_env.yml b/benchmark_code/conda_env.yml index 577694a..85f1c3e 100644 --- a/benchmark_code/conda_env.yml +++ b/benchmark_code/conda_env.yml @@ -261,7 +261,6 @@ dependencies: - pyflakes=3.2.0=pyhd8ed1ab_0 - pyg=2.5.0=py310_torch_2.2.0_cu121 - pygments=2.17.2=pyhd8ed1ab_0 - - pygrinder=0.4=pyh60bb809_0 - pyparsing=3.1.2=pyhd8ed1ab_0 - pyqt=5.15.9=py310h04931ad_5 - pyqt5-sip=12.12.2=py310hc6cd4ac_5 @@ -326,7 +325,6 @@ dependencies: - tornado=6.4=py310h2372a71_0 - tqdm=4.65.0=py310h2f386ee_0 - traitlets=5.14.1=pyhd8ed1ab_0 - - tsdb=0.3.1=pyhc1e730c_0 - types-python-dateutil=2.8.19.20240311=pyhd8ed1ab_0 - typing-extensions=4.9.0=py310h06a4308_1 - typing_extensions=4.9.0=py310h06a4308_1 @@ -370,6 +368,7 @@ dependencies: - zstd=1.5.5=hfc55251_0 - pip: - astor==0.8.1 + - benchpots==0.1 - cloudpickle==3.0.0 - contextlib2==21.6.0 - einops==0.8.0 @@ -379,11 +378,14 @@ dependencies: - nvidia-ml-py==12.535.133 - prettytable==3.10.0 - pyarrow==15.0.1 - - pypots==0.5 + - pygrinder==0.6 + - pypots==0.6 - pythonwebhdfs==0.2.3 - responses==0.25.0 - schema==0.7.5 - simplejson==3.19.2 - sphinxcontrib-gtagjs==0.2.1 + - tsdb==0.4 - typeguard==2.13.3 - websockets==12.0 + - xgboost==2.0.3 diff --git a/benchmark_code/data/dataset_generating_block05.py b/benchmark_code/data/dataset_generating_block05.py index fb95dae..83e0bb2 100644 --- a/benchmark_code/data/dataset_generating_block05.py +++ b/benchmark_code/data/dataset_generating_block05.py @@ -91,7 +91,7 @@ block_len = 6 block_width = 6 pems_traffic = preprocess_pems_traffic( - file_path="/Users/wdu/Downloads/traffic.csv", + file_path="traffic.csv", rate=rate, n_steps=step, pattern=pattern, diff --git a/benchmark_code/data/dataset_generating_point01.py b/benchmark_code/data/dataset_generating_point01.py index 82e964b..64ef268 100644 --- a/benchmark_code/data/dataset_generating_point01.py +++ b/benchmark_code/data/dataset_generating_point01.py @@ -114,7 +114,7 @@ step = 24 pems_traffic = preprocess_pems_traffic( - file_path="/Users/wdu/Downloads/traffic.csv", + file_path="traffic.csv", rate=rate, n_steps=step, pattern=pattern, diff --git a/benchmark_code/data/dataset_generating_point05.py b/benchmark_code/data/dataset_generating_point05.py index 31859ef..9ea22d6 100644 --- a/benchmark_code/data/dataset_generating_point05.py +++ b/benchmark_code/data/dataset_generating_point05.py @@ -64,7 +64,7 @@ step = 24 pems_traffic = preprocess_pems_traffic( - file_path="/Users/wdu/Downloads/traffic.csv", + file_path="traffic.csv", rate=rate, n_steps=step, pattern=pattern, diff --git a/benchmark_code/data/dataset_generating_point09.py b/benchmark_code/data/dataset_generating_point09.py index 0f24c11..d6db03b 100644 --- a/benchmark_code/data/dataset_generating_point09.py +++ b/benchmark_code/data/dataset_generating_point09.py @@ -68,7 +68,7 @@ step = 24 pems_traffic = preprocess_pems_traffic( - file_path="/Users/wdu/Downloads/traffic.csv", + file_path="traffic.csv", rate=rate, n_steps=step, pattern=pattern, diff --git a/benchmark_code/data/dataset_generating_subseq05.py b/benchmark_code/data/dataset_generating_subseq05.py index b8edf58..b706954 100644 --- a/benchmark_code/data/dataset_generating_subseq05.py +++ b/benchmark_code/data/dataset_generating_subseq05.py @@ -70,7 +70,7 @@ step = 24 seq_len = 18 pems_traffic = preprocess_pems_traffic( - file_path="/Users/wdu/Downloads/traffic.csv", + file_path="traffic.csv", rate=rate, n_steps=step, pattern=pattern, diff --git a/benchmark_code/hpo_results/physionet2019.py b/benchmark_code/hpo_results/physionet2019.py index 27c3f2e..242a73b 100644 --- a/benchmark_code/hpo_results/physionet2019.py +++ b/benchmark_code/hpo_results/physionet2019.py @@ -19,7 +19,7 @@ "d_v": 256, "dropout": 0.1, "attn_dropout": 0, - "lr": 0.00008636859103794524 + "lr": 0.00008636859103794524, }, "SAITS": { "n_steps": 48, @@ -34,7 +34,7 @@ "d_v": 256, "dropout": 0, "attn_dropout": 0.3, - "lr": 0.00020168819721792526 + "lr": 0.00020168819721792526, }, "FreTS": { "n_steps": 48, @@ -44,7 +44,7 @@ "embed_size": 256, "hidden_size": 256, "channel_independence": True, - "lr": 0.0014930673940111879 + "lr": 0.0014930673940111879, }, "Koopa": { "n_steps": 48, @@ -56,23 +56,23 @@ "d_hidden": 256, "n_hidden_layers": 3, "n_blocks": 1, - "lr": 0.0008272498497234694 + "lr": 0.0008272498497234694, }, - "Crossformer": { - "n_steps": 48, - "n_features": 33, - "epochs": 100, - "patience": 10, - "n_layers": 3, - "d_model": 64, - "d_ffn": 256, - "n_heads": 4, - "factor": 5, - "seg_len": 6, - "win_size": 2, - "dropout": 0.2, + "Crossformer": { + "n_steps": 48, + "n_features": 33, + "epochs": 100, + "patience": 10, + "n_layers": 3, + "d_model": 64, + "d_ffn": 256, + "n_heads": 4, + "factor": 5, + "seg_len": 6, + "win_size": 2, + "dropout": 0.2, "lr": 0.0031286300233652076, - }, + }, "TimesNet": { "n_steps": 48, "n_features": 33, @@ -84,7 +84,7 @@ "d_ffn": 1024, "n_kernels": 5, "dropout": 0.3, - "lr": 0.00017177144121842495 + "lr": 0.00017177144121842495, }, "PatchTST": { "n_steps": 48, @@ -101,22 +101,22 @@ "d_v": 128, "dropout": 0.1, "attn_dropout": 0.1, - "lr": 0.0004466135652323526 + "lr": 0.0004466135652323526, }, - "ETSformer": { - "n_steps": 48, + "ETSformer": { + "n_steps": 48, "n_features": 33, - "epochs": 100, - "patience": 10, - "n_e_layers": 3, - "n_d_layers": 2, - "d_model": 1024, + "epochs": 100, + "patience": 10, + "n_e_layers": 3, + "n_d_layers": 2, + "d_model": 1024, "d_ffn": 1024, - "n_heads": 8, - "top_k": 5, - "dropout": 0.1, + "n_heads": 8, + "top_k": 5, + "dropout": 0.1, "lr": 0.0004604206499094914, - }, + }, "MICN": { "n_steps": 48, "n_features": 33, @@ -128,7 +128,15 @@ "dropout": 0.2, "lr": 0.0000852263260132517, }, - "DLinear": { "n_steps": 48, "n_features": 33, "epochs": 100, "patience": 10, "moving_avg_window_size": 13, "d_model": 1024, "lr": 0.0001633470877552397 }, + "DLinear": { + "n_steps": 48, + "n_features": 33, + "epochs": 100, + "patience": 10, + "moving_avg_window_size": 13, + "d_model": 1024, + "lr": 0.0001633470877552397, + }, "SCINet": { "n_steps": 48, "n_features": 33, @@ -140,7 +148,7 @@ "n_decoder_layers": 2, "d_hidden": 512, "dropout": 0, - "lr": 0.00018311166986294462 + "lr": 0.00018311166986294462, }, "NonstationaryTransformer": { "n_steps": 48, @@ -152,14 +160,23 @@ "n_heads": 2, "d_ffn": 256, "n_projector_hidden_layers": 2, - "d_projector_hidden": [ - 128, - 128 - ], + "d_projector_hidden": [128, 128], "dropout": 0.2, - "lr": 0.00010519284750234083 + "lr": 0.00010519284750234083, + }, + "FiLM": { + "n_steps": 48, + "n_features": 33, + "epochs": 100, + "patience": 10, + "window_size": [2], + "multiscale": [1, 2], + "modes1": 32, + "dropout": 0.4, + "mode_type": 0, + "d_model": 64, + "lr": 0.008109542467067061, }, - "FiLM": { "n_steps": 48, "n_features": 33, "epochs": 100, "patience": 10, "window_size": [ 2 ], "multiscale": [ 1, 2 ], "modes1": 32, "dropout": 0.4, "mode_type": 0, "d_model": 64, "lr": 0.008109542467067061 }, "Pyraformer": { "n_steps": 48, "n_features": 33, @@ -169,17 +186,39 @@ "d_model": 256, "d_ffn": 1024, "n_heads": 4, - "window_size": [ - 4, - 4 - ], + "window_size": [4, 4], "inner_size": 3, "dropout": 0, "attn_dropout": 0.1, - "lr": 0.0002148305519207637 + "lr": 0.0002148305519207637, + }, + "Autoformer": { + "n_steps": 48, + "n_features": 33, + "epochs": 100, + "patience": 10, + "n_layers": 1, + "d_model": 128, + "d_ffn": 1024, + "n_heads": 4, + "factor": 3, + "moving_avg_window_size": 13, + "dropout": 0, + "lr": 0.00026656159603612764, + }, + "CSDI": { + "n_steps": 48, + "n_features": 33, + "patience": 10, + "epochs": 100, + "n_layers": 4, + "n_heads": 8, + "n_channels": 32, + "d_time_embedding": 256, + "d_feature_embedding": 16, + "d_diffusion_embedding": 256, + "lr": 0.0018788258888970985, }, - "Autoformer": { "n_steps": 48, "n_features": 33, "epochs": 100, "patience": 10, "n_layers": 1, "d_model": 128, "d_ffn": 1024, "n_heads": 4, "factor": 3, "moving_avg_window_size": 13, "dropout": 0, "lr": 0.00026656159603612764 }, - "CSDI": { "n_steps": 48, "n_features": 33, "patience": 10, "epochs": 100, "n_layers": 4, "n_heads": 8, "n_channels": 32, "d_time_embedding": 256, "d_feature_embedding": 16, "d_diffusion_embedding": 256, "lr": 0.0018788258888970985 }, "Informer": { "n_steps": 48, "n_features": 33, @@ -191,7 +230,7 @@ "n_heads": 8, "factor": 5, "dropout": 0, - "lr": 0.00011916009330093557 + "lr": 0.00011916009330093557, }, "USGAN": { "n_steps": 48, @@ -200,7 +239,7 @@ "epochs": 100, "lr": 0.0007792852806075814, "rnn_hidden_size": 512, - "dropout": 0.4 + "dropout": 0.4, }, "StemGNN": { "n_steps": 48, @@ -211,7 +250,7 @@ "n_stacks": 2, "d_model": 512, "dropout": 0.4, - "lr": 0.0009306161201903658 + "lr": 0.0009306161201903658, }, "GPVAE": { "n_steps": 48, @@ -223,15 +262,9 @@ "beta": 0.2, "sigma": 1.005, "length_scale": 7, - "encoder_sizes": [ - 512, - 512 - ], - "decoder_sizes": [ - 128, - 128 - ], - "window_size": 36 + "encoder_sizes": [512, 512], + "decoder_sizes": [128, 128], + "window_size": 36, }, "MRNN": { "n_steps": 48, @@ -239,16 +272,23 @@ "patience": 10, "epochs": 100, "rnn_hidden_size": 512, - "lr": 0.006155127814415844 + "lr": 0.006155127814415844, + }, + "BRITS": { + "n_steps": 48, + "n_features": 33, + "patience": 10, + "epochs": 100, + "rnn_hidden_size": 512, + "lr": 0.0005763283506002885, }, - "BRITS": { "n_steps": 48, "n_features": 33, "patience": 10, "epochs": 100, "rnn_hidden_size": 512, "lr": 0.0005763283506002885 }, "GRUD": { "n_steps": 48, "n_features": 33, "epochs": 100, "patience": 10, "rnn_hidden_size": 32, - "lr": 0.002500711393245861 + "lr": 0.002500711393245861, }, "Transformer": { "n_steps": 48, @@ -263,6 +303,6 @@ "d_v": 128, "dropout": 0, "attn_dropout": 0.3, - "lr": 0.00011675690237576063 + "lr": 0.00011675690237576063, }, -} \ No newline at end of file +} From d3b99945c7a505992da5f329b5157ebf798e6860 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 19 Jun 2024 09:12:59 +0800 Subject: [PATCH 6/9] docs: add LICENSE; --- LICENSE | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..de25380 --- /dev/null +++ b/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2024-present, Wenjie Du +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. From afa956a6c92ad17f429a8398cb17b4965cf86b1f Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Wed, 19 Jun 2024 09:15:28 +0800 Subject: [PATCH 7/9] docs: update README; --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d2fceaa..6c13e0b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@

-The open-resource repository for the paper [**TSI-Bench: Benchmarking Time Series Imputation**]() +The repository for the paper [**TSI-Bench: Benchmarking Time Series Imputation**](https://arxiv.org/abs/2406.12747) from PyPOTS Research. The code and configurations for reproducing the experimental results in the paper are available under the folder `benchmark_code`. The README file here maintains a list of must-read papers on time-series imputation, and a collection of time-series imputation toolkits and resources. @@ -16,15 +16,15 @@ The README file here maintains a list of must-read papers on time-series imputat 🤗 Contributions to update new resources and articles are very welcome! ## ❖ Time-Series Imputation Toolkits -### Datasets +### `Datasets` [TSDB (Time Series Data Beans)](https://github.com/WenjieDu/TSDB): a Python toolkit can load 169 public time-series datasets with a single line of code. -### Missingness +### `Missingness` [PyGrinder](https://github.com/WenjieDu/PyGrinder): a Python library grinds data beans into the incomplete by introducing missing values with different missing patterns. -### Algorithms +### `Algorithms` [PyPOTS](https://github.com/WenjieDu/PyPOTS): a Python toolbox for data mining on Partially-Observed Time Series From 9cafa29e5ab2d7f1783b5bcbeced4463532a65ac Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Sun, 23 Jun 2024 00:19:35 +0800 Subject: [PATCH 8/9] feat: ignore all h5 files; --- .gitignore | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 1fac24d..424e4bc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1 @@ -benchmark_code/data/physionet_2012/test.h5 -benchmark_code/data/physionet_2012/train.h5 -benchmark_code/data/physionet_2012/val.h5 +*.h5 \ No newline at end of file From 3b4bf12ce3aaf2b433cd45c9431d2735aca7be06 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Sun, 23 Jun 2024 00:20:06 +0800 Subject: [PATCH 9/9] docs: update README; --- README.md | 17 +++++++++-- benchmark_code/README.md | 63 ++-------------------------------------- 2 files changed, 16 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 6c13e0b..c1b8699 100644 --- a/README.md +++ b/README.md @@ -17,15 +17,18 @@ The README file here maintains a list of must-read papers on time-series imputat ## ❖ Time-Series Imputation Toolkits ### `Datasets` -[TSDB (Time Series Data Beans)](https://github.com/WenjieDu/TSDB): a Python toolkit can load 169 public time-series datasets with a single line of code. +[TSDB (Time Series Data Beans)](https://github.com/WenjieDu/TSDB): a Python toolkit can load 170 public time-series datasets with a single line of code. +[BenchPOTS](https://github.com/WenjieDu/BenchPOTS): a Python suite provides standard preprocessing pipelines of 170 public datasets for benchmarking machine learning on POTS (Partially-Observed Time Series). + + ### `Missingness` [PyGrinder](https://github.com/WenjieDu/PyGrinder): a Python library grinds data beans into the incomplete by introducing missing values with different missing patterns. ### `Algorithms` -[PyPOTS](https://github.com/WenjieDu/PyPOTS): a Python toolbox for data mining on Partially-Observed Time Series +[PyPOTS](https://github.com/WenjieDu/PyPOTS): a Python toolbox for data mining on POTS (Partially-Observed Time Series) [MICE](https://github.com/amices/mice): Multivariate Imputation by Chained Equations @@ -222,7 +225,7 @@ researchers and practitioners who are interested in this field. [AI for Time Series (AI4TS) Papers, Tutorials, and Surveys](https://github.com/qingsongedu/awesome-AI-for-time-series-papers) ## ❖ Citing This Work -If you find this repository helpful to your work, please kindly star it and cite our benchmark paper and survey paper as follows: +If you find this repository helpful to your work, please kindly star it and cite our benchmark paper, survey paper, and PyPOTS as follows: ```bibtex @article{du2024tsibench, @@ -242,6 +245,14 @@ year={2024} } ``` +```bibtex +@article{du2023pypots, +title={{PyPOTS: a Python toolbox for data mining on Partially-Observed Time Series}}, +author={Wenjie Du}, +journal={arXiv preprint arXiv:2305.18811}, +year={2023}, +} +```
🏠 Visits diff --git a/benchmark_code/README.md b/benchmark_code/README.md index 1232e1b..6bde5c3 100644 --- a/benchmark_code/README.md +++ b/benchmark_code/README.md @@ -9,7 +9,7 @@ Please ensure that all the below library requirements are satisfied. ```yaml tsdb ==0.4 pygrinder ==0.6 -benchpots ==0.1 +benchpots ==0.1.1 pypots ==0.6 ``` @@ -46,63 +46,4 @@ To obtain the results of the naive methods, check out the commands in the shell ## ❖ Downstream Tasks - - -### Classification - -```shell -python downstream_classification.py \ - --model SAITS \ - --dataset PhysioNet2012 \ - --dataset_fold_path data/physionet_2012_rate01_point \ - --model_result_parent_fold results_point_rate01/SAITS_PhysioNet2012 \ - --device cuda:0 \ - --n_classes 2 - - python downstream_classification.py \ - --model SAITS \ - --dataset Pedestrian \ - --dataset_fold_path data/melbourne_pedestrian_rate01_step24_point \ - --model_result_parent_fold results_point_rate01/SAITS_Pedestrian \ - --device cuda:2 \ - --n_classes 10 - -python downstream_classification_naive.py \ - --dataset_fold_path data/physionet_2012_rate01_point \ - --dataset PhysioNet2012 \ - --device cuda:3 \ - --n_classes 2 -``` - -### Regression - -```shell -python downstream_regression.py \ - --model SAITS \ - --dataset ETT_h1 \ - --dataset_fold_path data/ett_rate01_step48_point \ - --model_result_parent_fold results_point_rate01/SAITS_ETT_h1 \ - --device cuda:0 - -python downstream_regression_naive.py \ - --dataset_fold_path data/ett_rate01_step48_point \ - --dataset ETT_h1 \ - --device cuda:3 -``` - - -### Forecasting - -```shell -python downstream_forecasting.py \ - --model SAITS \ - --dataset ETT_h1 \ - --dataset_fold_path data/ett_rate01_step48_point \ - --model_result_parent_fold results_point_rate01/SAITS_ETT_h1 \ - --device cuda:0 - -python downstream_forecasting_naive.py \ - --dataset_fold_path data/ett_rate01_step48_point \ - --dataset ETT_h1 \ - --device cuda:3 -``` \ No newline at end of file +We're clean up the code and updating the scripts for the downstream tasks. Will release the code soon. \ No newline at end of file