diff --git a/README.md b/README.md index 94f733e..332e8c6 100644 --- a/README.md +++ b/README.md @@ -51,23 +51,7 @@ Clone the [repository](https://github.com/oasci/vaxstats): git clone https://github.com/oasci/vaxstats.git ``` -### Conda environment - -Move into `vaxstats` directory (`cd vaxstats`) and install the development conda environment using [GNU Make](https://www.gnu.org/software/make/) (which could be installed by default on your system). - -```bash -make environment -``` - -Now you can activate the new conda environment `vaxstats-dev` and use `vaxstats` commands. - -```sh -conda activate vaxstats-dev -``` - -### Manual install - -Alternatively, you can manually install `vaxstats` using `pip` after moving into the directory. +Install `vaxstats` using `pip` after moving into the directory. ```sh pip install . @@ -75,21 +59,19 @@ pip install . This will install all dependencies and `vaxstats` into your current Python environment. -## Deploying +## Development -We use [bump-my-version](https://github.com/callowayproject/bump-my-version) to release a new version. -This will create a git tag used by [poetry-dynamic-version](https://github.com/mtkennerly/poetry-dynamic-versioning) to generate version strings and update `CHANGELOG.md`. - -For example, you would run the following command to bump the `minor` version. +We use [pixi](https://pixi.sh/latest/) to manage Python environments and simplify the developer workflow. +Once you have [pixi](https://pixi.sh/latest/) installed, move into `vaxstats` directory (e.g., `cd vaxstats`) and install the environment using the command ```bash -poetry run bump-my-version bump minor +pixi install ``` -After releasing a new version, you must push and include all tags. +Now you can activate the new virtual environment using -```bash -git push --follow-tags +```sh +pixi shell ``` ## License diff --git a/pixi.lock b/pixi.lock index 4994981..046ff14 100644 --- a/pixi.lock +++ b/pixi.lock @@ -48,6 +48,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.0-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.4-h5888daf_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/fastexcel-0.12.0-py312h12e396e_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 @@ -171,6 +172,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.12-5_cp312.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.2-py312h66e93f0_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.8.0-h6e8976b_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2024.07.02-h77b4e00_1.conda @@ -216,6 +218,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.5-hb9d3cd8_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-xorgproto-2024.1-hb9d3cd8_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.20.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda @@ -283,6 +286,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.0-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.4-h5888daf_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/fastexcel-0.12.0-py312h12e396e_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 @@ -441,6 +445,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.12-5_cp312.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.2-py312h66e93f0_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.8.0-h6e8976b_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/questionary-2.0.1-pyhd8ed1ab_1.conda @@ -505,6 +510,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.5-hb9d3cd8_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-xorgproto-2024.1-hb9d3cd8_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.20.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.23.0-py312hef9b889_1.conda @@ -576,6 +582,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.4-h5888daf_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/fastexcel-0.12.0-py312h12e396e_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 @@ -2157,6 +2164,31 @@ packages: purls: [] size: 138145 timestamp: 1730967050578 +- kind: conda + name: fastexcel + version: 0.12.0 + build: py312h12e396e_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/fastexcel-0.12.0-py312h12e396e_1.conda + sha256: 97c51ed0e913ebefbbc25accb34c75be32d505884cbcffbe954ed35d7d08e77f + md5: 33e8c797c7dcfb01d00eed61fa204d6e + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + - pyarrow >=8.0.0 + - python >=3.12,<3.13.0a0 + - python_abi 3.12.* *_cp312 + constrains: + - polars >=0.16.14 + - __glibc >=2.17 + - pandas >=1.4.4 + license: MIT + license_family: MIT + purls: + - pkg:pypi/fastexcel?source=hash-mapping + size: 1062176 + timestamp: 1729148485963 - kind: conda name: font-ttf-dejavu-sans-mono version: '2.37' @@ -7293,9 +7325,9 @@ packages: timestamp: 1730863295695 - kind: pypi name: vaxstats - version: 0.0.0.post51 + version: 0.0.0.post52 path: . - sha256: 38904455d61b83fc4511a2b38930e169af8d4524b06bb41407d1961da4ea335c + sha256: d256c926dfe5df0d466684f92c221f9883285199cbe96addfe2b17060db59068 editable: true - kind: conda name: watchdog diff --git a/pyproject.toml b/pyproject.toml index 7d7f306..8275b7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,13 +34,15 @@ black = { cmd = ["black", "--config", "pyproject.toml", "./content"] } format = { depends-on = ["mdlint", "isort", "black"] } [tool.pixi.dependencies] -python = ">=3.11.0,<3.13" +python = ">=3.12.0,<3.13" polars = ">=1.12.0,<2" loguru = ">=0.7.2,<0.8" statsforecast = ">=1.7.8,<2" xlsx2csv = ">=0.8.3,<0.9" pandas = ">=2.2.3,<3" matplotlib = ">=3.9.2,<4" +pyyaml = ">=6.0.2,<7" +fastexcel = ">=0.12.0,<0.13" [tool.pixi.feature.dev.dependencies] ruff = ">=0.7.2,<0.8" diff --git a/tests/test_df_prep.py b/tests/test_df_prep.py index 8b98e33..fc48cae 100644 --- a/tests/test_df_prep.py +++ b/tests/test_df_prep.py @@ -18,7 +18,7 @@ def test_clean_df(path_example_excel): def test_prep_df(path_example_excel): df = load_file(path_example_excel) df = clean_df(df) - df = prep_forecast_df(df, 0, 1, 6) + df = prep_forecast_df(df, 0, 0, 6) assert df.columns == ["unique_id", "ds", "y"] assert df.shape == (2_721, 3) diff --git a/vaxstats/io.py b/vaxstats/io.py index ddbad95..a5ec234 100644 --- a/vaxstats/io.py +++ b/vaxstats/io.py @@ -74,6 +74,10 @@ def prep_forecast_df( DataFrame's columns. ValueError: If the date and time strings do not match the specified formats. + Notes: + If `date_idx` and `time_idx` are the same, we combine `input_date_fmt` and + `input_time_fmt` and load from the specified column. + Examples: >>> import polars as pl >>> data = {'date': ["01-01-23", "01-02-23"], 'time': ["01:00:00 PM", "02:00:00 PM"], 'y': [10, 20]} @@ -96,35 +100,30 @@ def prep_forecast_df( raise IndexError("One or more column indices are out of range") # Select only the required columns using indices - df = df.select(df.columns[date_idx], df.columns[time_idx], df.columns[y_idx]) - - logger.debug("Combining date and time columns") - df = df.with_columns( - [pl.concat_str([df.columns[0], df.columns[1]], separator=" ").alias("ds")] - ) - logger.debug(f"Example row: {df[0]}") - - logger.debug( + if date_idx == time_idx: + df = df.select(df.columns[date_idx], df.columns[y_idx]) + df = df.rename({df.columns[0]: "ds"}) + else: + df = df.select(df.columns[date_idx], df.columns[time_idx], df.columns[y_idx]) + logger.debug("Combining date and time columns") + df = df.with_columns( + [pl.concat_str([df.columns[0], df.columns[1]], separator=" ").alias("ds")] + ) + logger.debug( f"Parsing datetimes with date format '{input_date_fmt}' and time format '{input_time_fmt}'" - ) - df = df.with_columns( - [ - pl.col("ds") - .str.strptime(pl.Datetime, format=f"{input_date_fmt} {input_time_fmt}") - .alias("parsed_datetime") - ] - ) - logger.debug(f"Example row: {df[0]}") + ) + df = df.with_columns( + [ + pl.col("ds") + .str.strptime(pl.Datetime, format=f"{input_date_fmt} {input_time_fmt}", strict=False) + .alias("parsed_datetime") + ] + ) - logger.debug(f"Writing datetimes in '{output_fmt}'") - df = df.with_columns( - [pl.col("parsed_datetime").dt.strftime(output_fmt).alias("ds")] - ) - - df = df.drop("parsed_datetime") + logger.debug(f"Example row: {df[0]}") # Rename the y column - df = df.rename({df.columns[2]: "y"}) + df = df.rename({df.columns[1]: "y"}) logger.debug("Adding unique_id column") df = df.with_columns(pl.lit(0).alias("unique_id"))