From b641cf140139efad31fa2b92311776b86b1fca37 Mon Sep 17 00:00:00 2001 From: gbayarri Date: Thu, 27 Jun 2024 11:40:35 +0200 Subject: [PATCH] 2024.1 --- README.md | 14 +- biobb_dna/__init__.py | 2 +- biobb_dna/curvesplus/canal_unzip.py | 1 + biobb_dna/dna/dna_timeseries_unzip.py | 7 +- biobb_dna/docs/source/change_log.md | 9 + biobb_dna/docs/source/command_line.md | 180 ++++++++- biobb_dna/docs/source/conf.py | 4 +- biobb_dna/docs/source/readme.md | 14 +- biobb_dna/docs/source/schema.html | 2 +- biobb_dna/interbp_correlations/interhpcorr.py | 1 - biobb_dna/intrabp_correlations/intrahpcorr.py | 1 - biobb_dna/json_schemas/average_stiffness.json | 6 + .../json_schemas/basepair_stiffness.json | 6 + biobb_dna/json_schemas/biobb_canal.json | 18 +- biobb_dna/json_schemas/biobb_canion.json | 12 +- biobb_dna/json_schemas/biobb_curves.json | 6 + biobb_dna/json_schemas/biobb_dna.json | 22 +- biobb_dna/json_schemas/bipopulations.json | 6 + biobb_dna/json_schemas/canal_unzip.json | 369 ++++++++++++++++++ biobb_dna/json_schemas/canonicalag.json | 6 + biobb_dna/json_schemas/dna_averages.json | 6 + biobb_dna/json_schemas/dna_bimodality.json | 264 +++++++------ biobb_dna/json_schemas/dna_timeseries.json | 6 + .../json_schemas/dna_timeseries_unzip.json | 335 ++++++++++++++++ biobb_dna/json_schemas/interbpcorr.json | 6 + biobb_dna/json_schemas/interhpcorr.json | 14 +- biobb_dna/json_schemas/interseqcorr.json | 6 + biobb_dna/json_schemas/intrabpcorr.json | 6 + biobb_dna/json_schemas/intrahpcorr.json | 14 +- biobb_dna/json_schemas/intraseqcorr.json | 6 + biobb_dna/json_schemas/puckering.json | 12 +- .../test/data/config/config_biobb_canal.json | 2 + .../test/data/config/config_biobb_canal.yml | 2 + .../test/data/config/config_canal_unzip.json | 6 + .../test/data/config/config_canal_unzip.yml | 3 + .../config/config_dna_timeseries_unzip.json | 8 + .../config/config_dna_timeseries_unzip.yml | 5 + setup.py | 4 +- 38 files changed, 1201 insertions(+), 190 deletions(-) create mode 100644 biobb_dna/json_schemas/canal_unzip.json create mode 100644 biobb_dna/json_schemas/dna_timeseries_unzip.json create mode 100644 biobb_dna/test/data/config/config_canal_unzip.json create mode 100644 biobb_dna/test/data/config/config_canal_unzip.yml create mode 100644 biobb_dna/test/data/config/config_dna_timeseries_unzip.json create mode 100644 biobb_dna/test/data/config/config_dna_timeseries_unzip.yml diff --git a/README.md b/README.md index d6756bc7..4fd30521 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![](https://img.shields.io/conda/vn/bioconda/biobb_dna?label=Conda)](https://anaconda.org/bioconda/biobb_dna) [![](https://img.shields.io/conda/dn/bioconda/biobb_dna?label=Conda%20Downloads)](https://anaconda.org/bioconda/biobb_dna) [![](https://img.shields.io/badge/Docker-Quay.io-blue)](https://quay.io/repository/biocontainers/biobb_dna?tab=tags) -[![](https://img.shields.io/badge/Singularity-GalaxyProject-blue)](https://depot.galaxyproject.org/singularity/biobb_dna:4.2.2--pyhdfd78af_0) +[![](https://img.shields.io/badge/Singularity-GalaxyProject-blue)](https://depot.galaxyproject.org/singularity/biobb_dna:4.2.3--pyhdfd78af_0) [![](https://img.shields.io/badge/OS-Unix%20%7C%20MacOS-blue)](https://github.com/bioexcel/biobb_dna) [![](https://img.shields.io/pypi/pyversions/biobb-dna.svg?label=Python%20Versions)](https://pypi.org/project/biobb-dna/) @@ -41,7 +41,7 @@ The latest documentation of this package can be found in our readthedocs site: [latest API documentation](http://biobb-dna.readthedocs.io/en/latest/). ### Version -v4.2.2 2024.1 +v4.2.3 2024.1 ### Installation Using PIP: @@ -51,7 +51,7 @@ Using PIP: * Installation: - pip install "biobb_dna>=4.2.2" + pip install "biobb_dna>=4.2.3" * Usage: [Python API documentation](https://biobb-dna.readthedocs.io/en/latest/modules.html) @@ -61,7 +61,7 @@ Using ANACONDA: * Installation: - conda install -c bioconda "biobb_dna>=4.2.2" + conda install -c bioconda "biobb_dna>=4.2.3" * Usage: With conda installation BioBBs can be used with the [Python API documentation](https://biobb-dna.readthedocs.io/en/latest/modules.html) and the [Command Line documentation](https://biobb-dna.readthedocs.io/en/latest/command_line.html) @@ -71,13 +71,13 @@ Using DOCKER: * Installation: - docker pull quay.io/biocontainers/biobb_dna:4.2.2--pyhdfd78af_0 + docker pull quay.io/biocontainers/biobb_dna:4.2.3--pyhdfd78af_0 * Usage: - docker run quay.io/biocontainers/biobb_dna:4.2.2--pyhdfd78af_0 + docker run quay.io/biocontainers/biobb_dna:4.2.3--pyhdfd78af_0 Using SINGULARITY: @@ -86,7 +86,7 @@ Using SINGULARITY: * Installation: - singularity pull --name biobb_dna.sif https://depot.galaxyproject.org/singularity/biobb_dna:4.2.2--pyhdfd78af_0 + singularity pull --name biobb_dna.sif https://depot.galaxyproject.org/singularity/biobb_dna:4.2.3--pyhdfd78af_0 * Usage: diff --git a/biobb_dna/__init__.py b/biobb_dna/__init__.py index 9fd9c54b..c2be4bf7 100644 --- a/biobb_dna/__init__.py +++ b/biobb_dna/__init__.py @@ -1,4 +1,4 @@ name = "biobb_dna" __all__ = ["dna", "curvesplus", "backbone", "stiffness", "interbp_correlations", "intrabp_correlations"] -__version__ = "4.2.2" +__version__ = "4.2.3" diff --git a/biobb_dna/curvesplus/canal_unzip.py b/biobb_dna/curvesplus/canal_unzip.py index 8b7a18fc..ec83c74c 100755 --- a/biobb_dna/curvesplus/canal_unzip.py +++ b/biobb_dna/curvesplus/canal_unzip.py @@ -27,6 +27,7 @@ class CanalUnzip(BiobbObject): * **correlation** (*str*) - (None) Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type. * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. + * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. Examples: This is a use example of how to use the building block from Python:: diff --git a/biobb_dna/dna/dna_timeseries_unzip.py b/biobb_dna/dna/dna_timeseries_unzip.py index b09e49d6..595bc0da 100755 --- a/biobb_dna/dna/dna_timeseries_unzip.py +++ b/biobb_dna/dna/dna_timeseries_unzip.py @@ -19,9 +19,9 @@ class DnaTimeseriesUnzip(BiobbObject): Args: input_zip_file (str): Zip file with dna_timeseries output files. File type: input. `Sample file `_. Accepted formats: zip (edam:format_3987). - output_path_csv (str): dna_timeseries output csv file contained within input_zip_file. File type: output. `Sample file `_. Accepted formats: csv (edam:format_3752). - output_path_jpg (str): dna_timeseries output jpg file contained within input_zip_file. File type: output. `Sample file `_. Accepted formats: jpg (edam:format_3579). - output_list_path (str) (Optional): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. `Sample file `_. Accepted formats: txt (edam:format_2330). + output_path_csv (str): dna_timeseries output csv file contained within input_zip_file. File type: output. `Sample file `_. Accepted formats: csv (edam:format_3752). + output_path_jpg (str): dna_timeseries output jpg file contained within input_zip_file. File type: output. `Sample file `_. Accepted formats: jpg (edam:format_3579). + output_list_path (str) (Optional): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. `Sample file `_. Accepted formats: txt (edam:format_2330). properties (dic): * **type** (*str*) - (None) Type of analysis, series or histogram. Values: series, hist. * **parameter** (*str*) - (None) Type of parameter. Values: majd, majw, mind, minw, inclin, tip, xdisp, ydisp, shear, stretch, stagger, buckle, propel, opening, rise, roll, twist, shift, slide, tilt, alphaC, alphaW, betaC, betaW, gammaC, gammaW, deltaC, deltaW, epsilC, epsilW, zetaC, zetaW, chiC, chiW, phaseC, phaseW. @@ -29,6 +29,7 @@ class DnaTimeseriesUnzip(BiobbObject): * **index** (*int*) - (0) Base pair index in the parameter 'sequence', starting from 0. * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. + * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. Examples: This is a use example of how to use the building block from Python:: diff --git a/biobb_dna/docs/source/change_log.md b/biobb_dna/docs/source/change_log.md index 898c6f9e..bbf1cd88 100644 --- a/biobb_dna/docs/source/change_log.md +++ b/biobb_dna/docs/source/change_log.md @@ -1,5 +1,14 @@ # Biobb Analysis changelog +## What's new in version [4.2.3](https://github.com/bioexcel/biobb_dna/releases/tag/v4.2.3)? +In version 4.2.3 some bugs in Canion and Curves have been fixed. Added two new tools: CanalUnzip and DnaTimeseriesUnzip. + +### New features + +* Bug fixes in Canion and Curves (curvesplus) +* New CanalUnzip tool (curvesplus) +* New DnaTimeseriesUnzip tool (dna) + ## What's new in version [4.2.2](https://github.com/bioexcel/biobb_dna/releases/tag/v4.2.2)? In version 4.2.2 a bug in HelParBimodality has been fixed. diff --git a/biobb_dna/docs/source/command_line.md b/biobb_dna/docs/source/command_line.md index 30c0f265..4ed48505 100644 --- a/biobb_dna/docs/source/command_line.md +++ b/biobb_dna/docs/source/command_line.md @@ -52,6 +52,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_bipopulations.yml) ```python @@ -129,6 +130,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_intrabpcorr.yml) ```python @@ -206,6 +208,7 @@ Config parameters for this building block: * **scaling** (*array*): ([1, 1, 1, 10.6, 10.6, 10.6]) Values by which to scale stiffness. Positions correspond to helical parameters in the order: shift, slide, rise, tilt, roll, twist.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_basepair_stiffness.yml) ```python @@ -270,6 +273,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_dna_averages.yml) ```python @@ -347,17 +351,20 @@ Config parameters for this building block: * **lev2** (*integer*): (0) Upper base level limit used for analysis. If lev1 > 0 and lev2 = 0, lev2 is set to lev1 (i.e. analyze lev1 only). If lev1=lev2=0, lev1 is set to 1 and lev2 is set to the length of the oligmer (i.e. analyze all levels).. * **nastr** (*string*): (NA) character string used to indicate missing data in .ser files.. * **cormin** (*number*): (0.6) minimal absolute value for printing linear correlation coefficients between pairs of analyzed variables.. -* **series** (*string*): (False) if True then output spatial or time series data. Only possible for the analysis of single structures or single trajectories.. -* **histo** (*string*): (False) if True then output histogram data.. -* **corr** (*string*): (False) if True than output linear correlation coefficients between all variables.. +* **series** (*boolean*): (False) if True then output spatial or time series data. Only possible for the analysis of single structures or single trajectories.. +* **histo** (*boolean*): (False) if True then output histogram data.. +* **corr** (*boolean*): (False) if True than output linear correlation coefficients between all variables.. * **sequence** (*string*): (Optional) sequence of the first strand of the corresponding DNA fragment, for each .cda file. If not given it will be parsed from .lis file.. * **binary_path** (*string*): (Canal) Path to Canal executable, otherwise the program wil look for Canal executable in the binaries folder.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_biobb_canal.yml) ```python properties: + corr: true + histo: true sequence: CGCGAATTCGCG series: true @@ -372,6 +379,8 @@ biobb_canal --config config_biobb_canal.yml --input_cda_file curves_output.cda - { "properties": { "series": true, + "histo": true, + "corr": true, "sequence": "CGCGAATTCGCG" } } @@ -381,6 +390,72 @@ biobb_canal --config config_biobb_canal.yml --input_cda_file curves_output.cda - biobb_canal --config config_biobb_canal.json --input_cda_file curves_output.cda --input_lis_file input.lis --output_zip_path canal_output.zip ``` +## Canal_unzip +Tool for extracting biobb_canal output files. +### Get help +Command: +```python +canal_unzip -h +``` + usage: canal_unzip [-h] [--config CONFIG] --input_zip_file INPUT_ZIP_FILE --output_path OUTPUT_PATH [--output_list_path OUTPUT_LIST_PATH] + + Tool for extracting biobb_canal output files. + + optional arguments: + -h, --help show this help message and exit + --config CONFIG Configuration file + --output_list_path OUTPUT_LIST_PATH + Text file with a list of all Canal output files contained within input_zip_file. Accepted formats: txt. + + required arguments: + --input_zip_file INPUT_ZIP_FILE + Zip file with Canal output files. Accepted formats: zip. + --output_path OUTPUT_PATH + Canal output file contained within input_zip_file. Accepted formats: ser, his, cor. +### I / O Arguments +Syntax: input_argument (datatype) : Definition + +Config input / output arguments for this building block: +* **input_zip_file** (*string*): Zip file with Canal output files. File type: input. [Sample file](https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/canal_output.zip). Accepted formats: ZIP +* **output_path** (*string*): Canal output file contained within input_zip_file. File type: output. [Sample file](https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.ser). Accepted formats: SER, HIS, COR +* **output_list_path** (*string*): Text file with a list of all Canal output files contained within input_zip_file. File type: output. [Sample file](https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.txt). Accepted formats: TXT +### Config +Syntax: input_parameter (datatype) - (default_value) Definition + +Config parameters for this building block: +* **type** (*string*): (None) Type of file. . +* **helpar_name** (*string*): (None) Helical parameter name, only for 'series' and 'histo' types. . +* **correlation** (*string*): (None) Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type.. +* **remove_tmp** (*boolean*): (True) Remove temporal files.. +* **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. +### YAML +#### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_canal_unzip.yml) +```python +properties: + helpar_name: alphaC + type: histo + +``` +#### Command line +```python +canal_unzip --config config_canal_unzip.yml --input_zip_file canal_output.zip --output_path canal_unzip_output.ser --output_list_path canal_unzip_output.txt +``` +### JSON +#### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_canal_unzip.json) +```python +{ + "properties": { + "type": "histo", + "helpar_name": "alphaC" + } +} +``` +#### Command line +```python +canal_unzip --config config_canal_unzip.json --input_zip_file canal_output.zip --output_path canal_unzip_output.ser --output_list_path canal_unzip_output.txt +``` + ## Biobb_canion Wrapper for the Canion executable that is part of the Curves+ software suite. ### Get help @@ -425,14 +500,15 @@ Config parameters for this building block: * **rhig** (*number*): (0.0) Maximal distances from the helical axis taken into account in the analysis.. * **alow** (*number*): (0.0) Minimal angle range to analyze.. * **ahig** (*number*): (360.0) Maximal angle range to analyze.. -* **itst** (*integer*): (None) Number of first snapshot to be analyzed.. -* **itnd** (*integer*): (None) Number of last snapshot to be analyzed.. -* **itdel** (*integer*): (None) Spacing between analyzed snapshots.. +* **itst** (*integer*): (0) Number of first snapshot to be analyzed.. +* **itnd** (*integer*): (0) Number of last snapshot to be analyzed.. +* **itdel** (*integer*): (1) Spacing between analyzed snapshots.. * **rmsf** (*boolean*): (False) If set to True uses the combination of the helical ion parameters and an average helical axis to map the ions into Cartesian space and then calculates their average position (pdb output) and their root mean square fluctuation values (rmsf output). A single pass rmsf algorithm to make this calculation possible with a single read of the trajectory file. This option is generally used for solute atoms and not for solvent molecules or ions.. * **circ** (*boolean*): (False) If set to True, minicircles are analyzed.. * **binary_path** (*string*): (Canion) Path to Canion executable, otherwise the program wil look for Canion executable in the binaries folder.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_biobb_canion.yml) ```python @@ -514,9 +590,10 @@ Config input / output arguments for this building block: Syntax: input_parameter (datatype) - (default_value) Definition Config parameters for this building block: +* **base** (*string*): (None) Name of base analyzed.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. -* **base** (*string*): (None) Name of base analyzed.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_intrahpcorr.yml) ```python @@ -580,6 +657,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_intraseqcorr.yml) ```python @@ -621,7 +699,7 @@ dna_bimodality -h --config CONFIG Configuration file --input_zip_file INPUT_ZIP_FILE Path to zip file containing csv input files. Accepted formats: zip. - + required arguments: --input_csv_file INPUT_CSV_FILE Path to csv file with data. Accepted formats: csv. @@ -647,6 +725,7 @@ Config parameters for this building block: * **tol** (*number*): (1e-05) Tolerance value for EM algorithm.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.1. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_dna_bimodality.yml) ```python @@ -715,11 +794,11 @@ Syntax: input_parameter (datatype) - (default_value) Definition Config parameters for this building block: * **sequence** (*string*): (None) Nucleic acid sequence corresponding to the input .ser file. Length of sequence is expected to be the same as the total number of columns in the .ser file, minus the index column (even if later on a subset of columns is selected with the *seqpos* option).. -* **helpar_name** (*string*): (None) helical parameter name.. * **stride** (*integer*): (1000) granularity of the number of snapshots for plotting time series.. * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_puckering.yml) ```python @@ -782,6 +861,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_dna_timeseries.yml) ```python @@ -866,6 +946,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_interbpcorr.yml) ```python @@ -891,6 +972,80 @@ interbpcorr --config config_interbpcorr.yml --input_filename_shift canal_output_ interbpcorr --config config_interbpcorr.json --input_filename_shift canal_output_shift.ser --input_filename_slide canal_output_slide.ser --input_filename_rise canal_output_rise.ser --input_filename_tilt canal_output_tilt.ser --input_filename_roll canal_output_roll.ser --input_filename_twist canal_output_twist.ser --output_csv_path inter_bpcorr_ref.csv --output_jpg_path inter_bpcorr_ref.jpg ``` +## Dna_timeseries_unzip +Tool for extracting dna_timeseries output files. +### Get help +Command: +```python +dna_timeseries_unzip -h +``` + usage: dna_timeseries_unzip [-h] [--config CONFIG] --input_zip_file INPUT_ZIP_FILE --output_path_csv OUTPUT_PATH_CSV --output_path_jpg OUTPUT_PATH_JPG [--output_list_path OUTPUT_LIST_PATH] + + Tool for extracting dna_timeseries output files. + + optional arguments: + -h, --help show this help message and exit + --config CONFIG Configuration file + --output_list_path OUTPUT_LIST_PATH + Text file with a list of all dna_timeseries output files contained within input_zip_file. Accepted formats: txt. + + required arguments: + --input_zip_file INPUT_ZIP_FILE + Zip file with dna_timeseries output files. Accepted formats: zip. + --output_path_csv OUTPUT_PATH_CSV + dna_timeseries output csv file contained within input_zip_file. Accepted formats: csv. + --output_path_jpg OUTPUT_PATH_JPG + dna_timeseries output jpg file contained within input_zip_file. Accepted formats: jpg. +### I / O Arguments +Syntax: input_argument (datatype) : Definition + +Config input / output arguments for this building block: +* **input_zip_file** (*string*): Zip file with dna_timeseries output files. File type: input. [Sample file](https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/timeseries_output.zip). Accepted formats: ZIP +* **output_path_csv** (*string*): dna_timeseries output csv file contained within input_zip_file. File type: output. [Sample file](https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.csv). Accepted formats: CSV +* **output_path_jpg** (*string*): dna_timeseries output jpg file contained within input_zip_file. File type: output. [Sample file](https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.jpg). Accepted formats: JPG +* **output_list_path** (*string*): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. [Sample file](https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.txt). Accepted formats: TXT +### Config +Syntax: input_parameter (datatype) - (default_value) Definition + +Config parameters for this building block: +* **type** (*string*): (None) Type of analysis, series or histogram. . +* **parameter** (*string*): (None) Type of parameter. . +* **sequence** (*string*): (None) Nucleic acid sequence used for generating dna_timeseries output file.. +* **index** (*integer*): (0) Base pair index in the parameter 'sequence', starting from 0.. +* **remove_tmp** (*boolean*): (True) Remove temporal files.. +* **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. +### YAML +#### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_dna_timeseries_unzip.yml) +```python +properties: + index: 5 + parameter: shift + sequence: CGCGAATTCGCG + type: hist + +``` +#### Command line +```python +dna_timeseries_unzip --config config_dna_timeseries_unzip.yml --input_zip_file timeseries_output.zip --output_path_csv dna_timeseries_unzip.csv --output_path_jpg dna_timeseries_unzip.jpg --output_list_path dna_timeseries_unzip.txt +``` +### JSON +#### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_dna_timeseries_unzip.json) +```python +{ + "properties": { + "type": "hist", + "parameter": "shift", + "sequence": "CGCGAATTCGCG", + "index": 5 + } +} +``` +#### Command line +```python +dna_timeseries_unzip --config config_dna_timeseries_unzip.json --input_zip_file timeseries_output.zip --output_path_csv dna_timeseries_unzip.csv --output_path_jpg dna_timeseries_unzip.jpg --output_list_path dna_timeseries_unzip.txt +``` + ## Interseqcorr Calculate correlation between all base pairs of a single sequence and for a single helical parameter. ### Get help @@ -929,6 +1084,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_interseqcorr.yml) ```python @@ -1002,9 +1158,10 @@ Config input / output arguments for this building block: Syntax: input_parameter (datatype) - (default_value) Definition Config parameters for this building block: +* **basepair** (*string*): (None) Name of basepair analyzed.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. -* **basepair** (*string*): (None) Name of basepair analyzed.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_interhpcorr.yml) ```python @@ -1076,6 +1233,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_canonicalag.yml) ```python @@ -1140,6 +1298,7 @@ Config parameters for this building block: * **seqpos** (*array*): (None) list of sequence positions (columns indices starting by 0) to analyze. If not specified it will analyse the complete sequence.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_average_stiffness.yml) ```python @@ -1218,6 +1377,7 @@ Config parameters for this building block: * **binary_path** (*string*): (Cur+) Path to Curves+ executable, otherwise the program wil look for Cur+ executable in the binaries folder.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. * **restart** (*boolean*): (False) Do not execute if output files exist.. +* **sandbox_path** (*string*): (./) Parent path to the sandbox directory.. ### YAML #### [Common config file](https://github.com/bioexcel/biobb_dna/blob/master/biobb_dna/test/data/config/config_biobb_curves.yml) ```python diff --git a/biobb_dna/docs/source/conf.py b/biobb_dna/docs/source/conf.py index c3c05a76..f5d94464 100644 --- a/biobb_dna/docs/source/conf.py +++ b/biobb_dna/docs/source/conf.py @@ -74,9 +74,9 @@ # built documents. # # The short X.Y version. -version = u'4.2.2' +version = u'4.2.3' # The full version, including alpha/beta/rc tags. -release = u'4.2.2' +release = u'4.2.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/biobb_dna/docs/source/readme.md b/biobb_dna/docs/source/readme.md index d6756bc7..4fd30521 100644 --- a/biobb_dna/docs/source/readme.md +++ b/biobb_dna/docs/source/readme.md @@ -3,7 +3,7 @@ [![](https://img.shields.io/conda/vn/bioconda/biobb_dna?label=Conda)](https://anaconda.org/bioconda/biobb_dna) [![](https://img.shields.io/conda/dn/bioconda/biobb_dna?label=Conda%20Downloads)](https://anaconda.org/bioconda/biobb_dna) [![](https://img.shields.io/badge/Docker-Quay.io-blue)](https://quay.io/repository/biocontainers/biobb_dna?tab=tags) -[![](https://img.shields.io/badge/Singularity-GalaxyProject-blue)](https://depot.galaxyproject.org/singularity/biobb_dna:4.2.2--pyhdfd78af_0) +[![](https://img.shields.io/badge/Singularity-GalaxyProject-blue)](https://depot.galaxyproject.org/singularity/biobb_dna:4.2.3--pyhdfd78af_0) [![](https://img.shields.io/badge/OS-Unix%20%7C%20MacOS-blue)](https://github.com/bioexcel/biobb_dna) [![](https://img.shields.io/pypi/pyversions/biobb-dna.svg?label=Python%20Versions)](https://pypi.org/project/biobb-dna/) @@ -41,7 +41,7 @@ The latest documentation of this package can be found in our readthedocs site: [latest API documentation](http://biobb-dna.readthedocs.io/en/latest/). ### Version -v4.2.2 2024.1 +v4.2.3 2024.1 ### Installation Using PIP: @@ -51,7 +51,7 @@ Using PIP: * Installation: - pip install "biobb_dna>=4.2.2" + pip install "biobb_dna>=4.2.3" * Usage: [Python API documentation](https://biobb-dna.readthedocs.io/en/latest/modules.html) @@ -61,7 +61,7 @@ Using ANACONDA: * Installation: - conda install -c bioconda "biobb_dna>=4.2.2" + conda install -c bioconda "biobb_dna>=4.2.3" * Usage: With conda installation BioBBs can be used with the [Python API documentation](https://biobb-dna.readthedocs.io/en/latest/modules.html) and the [Command Line documentation](https://biobb-dna.readthedocs.io/en/latest/command_line.html) @@ -71,13 +71,13 @@ Using DOCKER: * Installation: - docker pull quay.io/biocontainers/biobb_dna:4.2.2--pyhdfd78af_0 + docker pull quay.io/biocontainers/biobb_dna:4.2.3--pyhdfd78af_0 * Usage: - docker run quay.io/biocontainers/biobb_dna:4.2.2--pyhdfd78af_0 + docker run quay.io/biocontainers/biobb_dna:4.2.3--pyhdfd78af_0 Using SINGULARITY: @@ -86,7 +86,7 @@ Using SINGULARITY: * Installation: - singularity pull --name biobb_dna.sif https://depot.galaxyproject.org/singularity/biobb_dna:4.2.2--pyhdfd78af_0 + singularity pull --name biobb_dna.sif https://depot.galaxyproject.org/singularity/biobb_dna:4.2.3--pyhdfd78af_0 * Usage: diff --git a/biobb_dna/docs/source/schema.html b/biobb_dna/docs/source/schema.html index e2787789..a012e694 100644 --- a/biobb_dna/docs/source/schema.html +++ b/biobb_dna/docs/source/schema.html @@ -10,7 +10,7 @@ "applicationSubCategory": "http://www.edamontology.org/topic_3892", "citation": "https://www.nature.com/articles/s41597-019-0177-4", "license": "https://www.apache.org/licenses/LICENSE-2.0", - "softwareVersion": "4.2.2", + "softwareVersion": "4.2.3", "applicationSuite": "BioBB BioExcel Building Blocks", "codeRepository": "https://github.com/bioexcel/biobb_dna", "isAccessibleForFree": "True", diff --git a/biobb_dna/interbp_correlations/interhpcorr.py b/biobb_dna/interbp_correlations/interhpcorr.py index 11240119..51e74353 100755 --- a/biobb_dna/interbp_correlations/interhpcorr.py +++ b/biobb_dna/interbp_correlations/interhpcorr.py @@ -33,7 +33,6 @@ class InterHelParCorrelation(BiobbObject): * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. - * **basepair** (*str*) - (None) Name of basepair analyzed. Examples: This is a use example of how to use the building block from Python:: diff --git a/biobb_dna/intrabp_correlations/intrahpcorr.py b/biobb_dna/intrabp_correlations/intrahpcorr.py index 72204e98..1f9265b8 100755 --- a/biobb_dna/intrabp_correlations/intrahpcorr.py +++ b/biobb_dna/intrabp_correlations/intrahpcorr.py @@ -33,7 +33,6 @@ class IntraHelParCorrelation(BiobbObject): * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. - * **base** (*str*) - (None) Name of base analyzed. Examples: This is a use example of how to use the building block from Python:: diff --git a/biobb_dna/json_schemas/average_stiffness.json b/biobb_dna/json_schemas/average_stiffness.json index 960f5be8..17438c74 100644 --- a/biobb_dna/json_schemas/average_stiffness.json +++ b/biobb_dna/json_schemas/average_stiffness.json @@ -107,6 +107,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/basepair_stiffness.json b/biobb_dna/json_schemas/basepair_stiffness.json index 1213e3e6..83030948 100644 --- a/biobb_dna/json_schemas/basepair_stiffness.json +++ b/biobb_dna/json_schemas/basepair_stiffness.json @@ -180,6 +180,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/biobb_canal.json b/biobb_dna/json_schemas/biobb_canal.json index 46530871..861d784a 100644 --- a/biobb_dna/json_schemas/biobb_canal.json +++ b/biobb_dna/json_schemas/biobb_canal.json @@ -121,20 +121,20 @@ "description": "minimal absolute value for printing linear correlation coefficients between pairs of analyzed variables." }, "series": { - "type": "string", - "default": "False", + "type": "boolean", + "default": false, "wf_prop": false, "description": "if True then output spatial or time series data. Only possible for the analysis of single structures or single trajectories." }, "histo": { - "type": "string", - "default": "False", + "type": "boolean", + "default": false, "wf_prop": false, "description": "if True then output histogram data." }, "corr": { - "type": "string", - "default": "False", + "type": "boolean", + "default": false, "wf_prop": false, "description": "if True than output linear correlation coefficients between all variables." }, @@ -161,6 +161,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/biobb_canion.json b/biobb_dna/json_schemas/biobb_canion.json index 509872e4..67364ad2 100644 --- a/biobb_dna/json_schemas/biobb_canion.json +++ b/biobb_dna/json_schemas/biobb_canion.json @@ -140,19 +140,19 @@ }, "itst": { "type": "integer", - "default": null, + "default": 0, "wf_prop": false, "description": "Number of first snapshot to be analyzed." }, "itnd": { "type": "integer", - "default": null, + "default": 0, "wf_prop": false, "description": "Number of last snapshot to be analyzed." }, "itdel": { "type": "integer", - "default": null, + "default": 1, "wf_prop": false, "description": "Spacing between analyzed snapshots." }, @@ -185,6 +185,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/biobb_curves.json b/biobb_dna/json_schemas/biobb_curves.json index b27db1ec..19e56bd4 100644 --- a/biobb_dna/json_schemas/biobb_curves.json +++ b/biobb_dna/json_schemas/biobb_curves.json @@ -212,6 +212,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/biobb_dna.json b/biobb_dna/json_schemas/biobb_dna.json index b78df93f..3a643545 100644 --- a/biobb_dna/json_schemas/biobb_dna.json +++ b/biobb_dna/json_schemas/biobb_dna.json @@ -4,9 +4,9 @@ "github": "https://github.com/bioexcel/biobb_dna", "readthedocs": "https://biobb-dna.readthedocs.io/en/latest/", "conda": "https://anaconda.org/bioconda/biobb_dna", - "docker": "https://quay.io/biocontainers/biobb_dna:4.2.2--pyhdfd78af_0", - "singularity": "https://depot.galaxyproject.org/singularity/biobb_dna:4.2.2--pyhdfd78af_0", - "version": "4.2.2", + "docker": "https://quay.io/biocontainers/biobb_dna:4.2.3--pyhdfd78af_0", + "singularity": "https://depot.galaxyproject.org/singularity/biobb_dna:4.2.3--pyhdfd78af_0", + "version": "4.2.3", "rest": true, "tools": [ { @@ -25,6 +25,14 @@ "docs": "https://biobb-dna.readthedocs.io/en/latest/curvesplus.html#module-curvesplus.biobb_canal", "rest": true }, + { + "block": "CanalUnzip", + "tool": "In House", + "desc": "Tool for extracting biobb_canal output files.", + "exec": "canal_unzip", + "docs": "https://biobb-dna.readthedocs.io/en/latest/curvesplus.html#module-curvesplus.canal_unzip", + "rest": false + }, { "block": "Canion", "tool": "Canion", @@ -73,6 +81,14 @@ "docs": "https://biobb-dna.readthedocs.io/en/latest/dna.html#module-dna.dna_timeseries", "rest": true }, + { + "block": "DnaTimeseriesUnzip", + "tool": "In House", + "desc": "Tool for extracting dna_timeseries output files.", + "exec": "dna_timeseries_unzip", + "docs": "https://biobb-dna.readthedocs.io/en/latest/dna.html#module-dna.dna_timeseries_unzip", + "rest": false + }, { "block": "HelParBimodality", "tool": "In House", diff --git a/biobb_dna/json_schemas/bipopulations.json b/biobb_dna/json_schemas/bipopulations.json index 7cc5580a..91c6f626 100644 --- a/biobb_dna/json_schemas/bipopulations.json +++ b/biobb_dna/json_schemas/bipopulations.json @@ -146,6 +146,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/canal_unzip.json b/biobb_dna/json_schemas/canal_unzip.json new file mode 100644 index 00000000..edc497f7 --- /dev/null +++ b/biobb_dna/json_schemas/canal_unzip.json @@ -0,0 +1,369 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://bioexcel.eu/biobb_dna/json_schemas/1.0/canal_unzip", + "name": "biobb_dna CanalUnzip", + "title": "Tool for extracting biobb_canal output files.", + "description": null, + "type": "object", + "info": { + "wrapped_software": { + "name": "In house", + "license": "Apache-2.0" + }, + "ontology": { + "name": "EDAM", + "schema": "http://edamontology.org/EDAM.owl" + } + }, + "required": [ + "input_zip_file", + "output_path" + ], + "properties": { + "input_zip_file": { + "type": "string", + "description": "Zip file with Canal output files", + "filetype": "input", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/canal_output.zip", + "enum": [ + ".*\\.zip$" + ], + "file_formats": [ + { + "extension": ".*\\.zip$", + "description": "Zip file with Canal output files", + "edam": "format_3987" + } + ] + }, + "output_path": { + "type": "string", + "description": "Canal output file contained within input_zip_file", + "filetype": "output", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.ser", + "enum": [ + ".*\\.ser$", + ".*\\.his$", + ".*\\.cor$" + ], + "file_formats": [ + { + "extension": ".*\\.ser$", + "description": "Canal output file contained within input_zip_file", + "edam": "format_2330" + }, + { + "extension": ".*\\.his$", + "description": "Canal output file contained within input_zip_file", + "edam": "format_3905" + }, + { + "extension": ".*\\.cor$", + "description": "Canal output file contained within input_zip_file", + "edam": "format_3465" + } + ] + }, + "output_list_path": { + "type": "string", + "description": "Text file with a list of all Canal output files contained within input_zip_file", + "filetype": "output", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.txt", + "enum": [ + ".*\\.txt$" + ], + "file_formats": [ + { + "extension": ".*\\.txt$", + "description": "Text file with a list of all Canal output files contained within input_zip_file", + "edam": "format_2330" + } + ] + }, + "properties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Type of file. ", + "enum": [ + "series", + "histo", + "corr" + ], + "property_formats": [ + { + "name": "series", + "description": null + }, + { + "name": "histo", + "description": null + }, + { + "name": "corr", + "description": null + } + ] + }, + "helpar_name": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Helical parameter name, only for 'series' and 'histo' types. ", + "enum": [ + "alphaC", + "alphaW", + "ampC", + "ampW", + "ax-bend", + "betaC", + "betaW", + "buckle", + "chiC", + "chiW", + "curv", + "deltaC", + "deltaW", + "epsilC", + "epsilW", + "gammaC", + "gammaW", + "h-ris", + "h-twi", + "inclin", + "majd", + "majw", + "mind", + "minw", + "opening", + "phaseC", + "phaseW", + "propel", + "reg", + "rise", + "roll", + "shear", + "shift", + "slide", + "stagger", + "stretch", + "tbend", + "tilt", + "tip", + "twist", + "xdisp", + "ydisp", + "zetaC", + "zetaW" + ], + "property_formats": [ + { + "name": "alphaC", + "description": null + }, + { + "name": "alphaW", + "description": null + }, + { + "name": "ampC", + "description": null + }, + { + "name": "ampW", + "description": null + }, + { + "name": "ax-bend", + "description": null + }, + { + "name": "betaC", + "description": null + }, + { + "name": "betaW", + "description": null + }, + { + "name": "buckle", + "description": null + }, + { + "name": "chiC", + "description": null + }, + { + "name": "chiW", + "description": null + }, + { + "name": "curv", + "description": null + }, + { + "name": "deltaC", + "description": null + }, + { + "name": "deltaW", + "description": null + }, + { + "name": "epsilC", + "description": null + }, + { + "name": "epsilW", + "description": null + }, + { + "name": "gammaC", + "description": null + }, + { + "name": "gammaW", + "description": null + }, + { + "name": "h-ris", + "description": null + }, + { + "name": "h-twi", + "description": null + }, + { + "name": "inclin", + "description": null + }, + { + "name": "majd", + "description": null + }, + { + "name": "majw", + "description": null + }, + { + "name": "mind", + "description": null + }, + { + "name": "minw", + "description": null + }, + { + "name": "opening", + "description": null + }, + { + "name": "phaseC", + "description": null + }, + { + "name": "phaseW", + "description": null + }, + { + "name": "propel", + "description": null + }, + { + "name": "reg", + "description": null + }, + { + "name": "rise", + "description": null + }, + { + "name": "roll", + "description": null + }, + { + "name": "shear", + "description": null + }, + { + "name": "shift", + "description": null + }, + { + "name": "slide", + "description": null + }, + { + "name": "stagger", + "description": null + }, + { + "name": "stretch", + "description": null + }, + { + "name": "tbend", + "description": null + }, + { + "name": "tilt", + "description": null + }, + { + "name": "tip", + "description": null + }, + { + "name": "twist", + "description": null + }, + { + "name": "xdisp", + "description": null + }, + { + "name": "ydisp", + "description": null + }, + { + "name": "zetaC", + "description": null + }, + { + "name": "zetaW", + "description": null + } + ] + }, + "correlation": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type." + }, + "remove_tmp": { + "type": "boolean", + "default": true, + "wf_prop": true, + "description": "Remove temporal files." + }, + "restart": { + "type": "boolean", + "default": false, + "wf_prop": true, + "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." + } + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/biobb_dna/json_schemas/canonicalag.json b/biobb_dna/json_schemas/canonicalag.json index 2a740e29..946f0d34 100644 --- a/biobb_dna/json_schemas/canonicalag.json +++ b/biobb_dna/json_schemas/canonicalag.json @@ -146,6 +146,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/dna_averages.json b/biobb_dna/json_schemas/dna_averages.json index 7b516b51..91b732fd 100644 --- a/biobb_dna/json_schemas/dna_averages.json +++ b/biobb_dna/json_schemas/dna_averages.json @@ -107,6 +107,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/dna_bimodality.json b/biobb_dna/json_schemas/dna_bimodality.json index 72cdfd95..012b12f0 100644 --- a/biobb_dna/json_schemas/dna_bimodality.json +++ b/biobb_dna/json_schemas/dna_bimodality.json @@ -1,131 +1,137 @@ { - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://bioexcel.eu/biobb_dna/json_schemas/1.0/dna_bimodality", - "name": "biobb_dna HelParBimodality", - "title": "Determine binormality/bimodality from a helical parameter series dataset.", - "description": "Determine binormality/bimodality from a helical parameter series dataset.", - "type": "object", - "info": { - "wrapped_software": { - "name": "In house", - "license": "Apache-2.0" - }, - "ontology": { - "name": "EDAM", - "schema": "http://edamontology.org/EDAM.owl" - } - }, - "required": [ - "input_csv_file", - "output_csv_path", - "output_jpg_path" - ], - "properties": { - "input_csv_file": { - "type": "string", - "description": "Path to .csv file with helical parameter series. If `input_zip_file` is passed, this should be just the filename of the .csv file inside .zip", - "filetype": "input", - "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/series_shift_AT.csv", - "enum": [ - ".*\\.csv$" - ], - "file_formats": [ - { - "extension": ".*\\.csv$", - "description": "Path to .csv file with helical parameter series. If `input_zip_file` is passed, this should be just the filename of the .csv file inside .zip", - "edam": "format_3752" - } - ] - }, - "input_zip_file": { - "type": "string", - "description": ".zip file containing the `input_csv_file` .csv file", - "filetype": "input", - "sample": null, - "enum": [ - ".*\\.zip$" - ], - "file_formats": [ - { - "extension": ".*\\.zip$", - "description": "zip file containing the `input_csv_file` .csv file", - "edam": "format_3987" - } - ] - }, - "output_csv_path": { - "type": "string", - "description": "Path to .csv file where output is saved", - "filetype": "output", - "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/AT_shift_bimod.csv", - "enum": [ - ".*\\.csv$" - ], - "file_formats": [ - { - "extension": ".*\\.csv$", - "description": "Path to .csv file where output is saved", - "edam": "format_3752" - } - ] - }, - "output_jpg_path": { - "type": "string", - "description": "Path to .jpg file where output is saved", - "filetype": "output", - "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/AT_shift_bimod.jpg", - "enum": [ - ".*\\.jpg$" - ], - "file_formats": [ - { - "extension": ".*\\.jpg$", - "description": "Path to .jpg file where output is saved", - "edam": "format_3579" - } - ] - }, - "properties": { - "type": "object", - "properties": { - "helpar_name": { - "type": "string", - "default": "Optional", - "wf_prop": false, - "description": "helical parameter name." - }, - "confidence_level": { - "type": "number", - "default": 5.0, - "wf_prop": false, - "description": "Confidence level for Byes Factor test (in percentage)." - }, - "max_iter": { - "type": "integer", - "default": 400, - "wf_prop": false, - "description": "Number of maximum iterations for EM algorithm." - }, - "tol": { - "type": "number", - "default": 1e-05, - "wf_prop": false, - "description": "Tolerance value for EM algorithm." - }, - "remove_tmp": { - "type": "boolean", - "default": true, - "wf_prop": true, - "description": "Remove temporal files." - }, - "restart": { - "type": "boolean", - "default": false, - "wf_prop": true, - "description": "Do not execute if output files exist.1" - } - } - } - }, - "additionalProperties": false + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://bioexcel.eu/biobb_dna/json_schemas/1.0/dna_bimodality", + "name": "biobb_dna HelParBimodality", + "title": "Determine binormality/bimodality from a helical parameter series dataset.", + "description": "Determine binormality/bimodality from a helical parameter series dataset.", + "type": "object", + "info": { + "wrapped_software": { + "name": "In house", + "license": "Apache-2.0" + }, + "ontology": { + "name": "EDAM", + "schema": "http://edamontology.org/EDAM.owl" + } + }, + "required": [ + "input_csv_file", + "output_csv_path", + "output_jpg_path" + ], + "properties": { + "input_csv_file": { + "type": "string", + "description": "Path to .csv file with helical parameter series. If `input_zip_file` is passed, this should be just the filename of the .csv file inside .zip", + "filetype": "input", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/series_shift_AT.csv", + "enum": [ + ".*\\.csv$" + ], + "file_formats": [ + { + "extension": ".*\\.csv$", + "description": "Path to .csv file with helical parameter series. If `input_zip_file` is passed, this should be just the filename of the .csv file inside .zip", + "edam": "format_3752" + } + ] + }, + "input_zip_file": { + "type": "string", + "description": ".zip file containing the `input_csv_file` .csv file", + "filetype": "input", + "sample": null, + "enum": [ + ".*\\.zip$" + ], + "file_formats": [ + { + "extension": ".*\\.zip$", + "description": "zip file containing the `input_csv_file` .csv file", + "edam": "format_3987" + } + ] + }, + "output_csv_path": { + "type": "string", + "description": "Path to .csv file where output is saved", + "filetype": "output", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/AT_shift_bimod.csv", + "enum": [ + ".*\\.csv$" + ], + "file_formats": [ + { + "extension": ".*\\.csv$", + "description": "Path to .csv file where output is saved", + "edam": "format_3752" + } + ] + }, + "output_jpg_path": { + "type": "string", + "description": "Path to .jpg file where output is saved", + "filetype": "output", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/AT_shift_bimod.jpg", + "enum": [ + ".*\\.jpg$" + ], + "file_formats": [ + { + "extension": ".*\\.jpg$", + "description": "Path to .jpg file where output is saved", + "edam": "format_3579" + } + ] + }, + "properties": { + "type": "object", + "properties": { + "helpar_name": { + "type": "string", + "default": "Optional", + "wf_prop": false, + "description": "helical parameter name." + }, + "confidence_level": { + "type": "number", + "default": 5.0, + "wf_prop": false, + "description": "Confidence level for Byes Factor test (in percentage)." + }, + "max_iter": { + "type": "integer", + "default": 400, + "wf_prop": false, + "description": "Number of maximum iterations for EM algorithm." + }, + "tol": { + "type": "number", + "default": 1e-05, + "wf_prop": false, + "description": "Tolerance value for EM algorithm." + }, + "remove_tmp": { + "type": "boolean", + "default": true, + "wf_prop": true, + "description": "Remove temporal files." + }, + "restart": { + "type": "boolean", + "default": false, + "wf_prop": true, + "description": "Do not execute if output files exist.1" + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." + } + } + } + }, + "additionalProperties": false } \ No newline at end of file diff --git a/biobb_dna/json_schemas/dna_timeseries.json b/biobb_dna/json_schemas/dna_timeseries.json index 37b7f33d..eeec6b01 100644 --- a/biobb_dna/json_schemas/dna_timeseries.json +++ b/biobb_dna/json_schemas/dna_timeseries.json @@ -96,6 +96,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/dna_timeseries_unzip.json b/biobb_dna/json_schemas/dna_timeseries_unzip.json new file mode 100644 index 00000000..f92e25b1 --- /dev/null +++ b/biobb_dna/json_schemas/dna_timeseries_unzip.json @@ -0,0 +1,335 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://bioexcel.eu/biobb_dna/json_schemas/1.0/dna_timeseries_unzip", + "name": "biobb_dna DnaTimeseriesUnzip", + "title": "Tool for extracting dna_timeseries output files.", + "description": null, + "type": "object", + "info": { + "wrapped_software": { + "name": "In house", + "license": "Apache-2.0" + }, + "ontology": { + "name": "EDAM", + "schema": "http://edamontology.org/EDAM.owl" + } + }, + "required": [ + "input_zip_file", + "output_path_csv", + "output_path_jpg" + ], + "properties": { + "input_zip_file": { + "type": "string", + "description": "Zip file with dna_timeseries output files", + "filetype": "input", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/timeseries_output.zip", + "enum": [ + ".*\\.zip$" + ], + "file_formats": [ + { + "extension": ".*\\.zip$", + "description": "Zip file with dna_timeseries output files", + "edam": "format_3987" + } + ] + }, + "output_path_csv": { + "type": "string", + "description": "dna_timeseries output csv file contained within input_zip_file", + "filetype": "output", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.csv", + "enum": [ + ".*\\.csv$" + ], + "file_formats": [ + { + "extension": ".*\\.csv$", + "description": "dna_timeseries output csv file contained within input_zip_file", + "edam": "format_3752" + } + ] + }, + "output_path_jpg": { + "type": "string", + "description": "dna_timeseries output jpg file contained within input_zip_file", + "filetype": "output", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.jpg", + "enum": [ + ".*\\.jpg$" + ], + "file_formats": [ + { + "extension": ".*\\.jpg$", + "description": "dna_timeseries output jpg file contained within input_zip_file", + "edam": "format_3579" + } + ] + }, + "output_list_path": { + "type": "string", + "description": "Text file with a list of all dna_timeseries output files contained within input_zip_file", + "filetype": "output", + "sample": "https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.txt", + "enum": [ + ".*\\.txt$" + ], + "file_formats": [ + { + "extension": ".*\\.txt$", + "description": "Text file with a list of all dna_timeseries output files contained within input_zip_file", + "edam": "format_2330" + } + ] + }, + "properties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Type of analysis, series or histogram. ", + "enum": [ + "series", + "hist" + ], + "property_formats": [ + { + "name": "series", + "description": null + }, + { + "name": "hist", + "description": null + } + ] + }, + "parameter": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Type of parameter. ", + "enum": [ + "majd", + "majw", + "mind", + "minw", + "inclin", + "tip", + "xdisp", + "ydisp", + "shear", + "stretch", + "stagger", + "buckle", + "propel", + "opening", + "rise", + "roll", + "twist", + "shift", + "slide", + "tilt", + "alphaC", + "alphaW", + "betaC", + "betaW", + "gammaC", + "gammaW", + "deltaC", + "deltaW", + "epsilC", + "epsilW", + "zetaC", + "zetaW", + "chiC", + "chiW", + "phaseC", + "phaseW" + ], + "property_formats": [ + { + "name": "majd", + "description": null + }, + { + "name": "majw", + "description": null + }, + { + "name": "mind", + "description": null + }, + { + "name": "minw", + "description": null + }, + { + "name": "inclin", + "description": null + }, + { + "name": "tip", + "description": null + }, + { + "name": "xdisp", + "description": null + }, + { + "name": "ydisp", + "description": null + }, + { + "name": "shear", + "description": null + }, + { + "name": "stretch", + "description": null + }, + { + "name": "stagger", + "description": null + }, + { + "name": "buckle", + "description": null + }, + { + "name": "propel", + "description": null + }, + { + "name": "opening", + "description": null + }, + { + "name": "rise", + "description": null + }, + { + "name": "roll", + "description": null + }, + { + "name": "twist", + "description": null + }, + { + "name": "shift", + "description": null + }, + { + "name": "slide", + "description": null + }, + { + "name": "tilt", + "description": null + }, + { + "name": "alphaC", + "description": null + }, + { + "name": "alphaW", + "description": null + }, + { + "name": "betaC", + "description": null + }, + { + "name": "betaW", + "description": null + }, + { + "name": "gammaC", + "description": null + }, + { + "name": "gammaW", + "description": null + }, + { + "name": "deltaC", + "description": null + }, + { + "name": "deltaW", + "description": null + }, + { + "name": "epsilC", + "description": null + }, + { + "name": "epsilW", + "description": null + }, + { + "name": "zetaC", + "description": null + }, + { + "name": "zetaW", + "description": null + }, + { + "name": "chiC", + "description": null + }, + { + "name": "chiW", + "description": null + }, + { + "name": "phaseC", + "description": null + }, + { + "name": "phaseW", + "description": null + } + ] + }, + "sequence": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Nucleic acid sequence used for generating dna_timeseries output file." + }, + "index": { + "type": "integer", + "default": 0, + "wf_prop": false, + "description": "Base pair index in the parameter 'sequence', starting from 0." + }, + "remove_tmp": { + "type": "boolean", + "default": true, + "wf_prop": true, + "description": "Remove temporal files." + }, + "restart": { + "type": "boolean", + "default": false, + "wf_prop": true, + "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." + } + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/biobb_dna/json_schemas/interbpcorr.json b/biobb_dna/json_schemas/interbpcorr.json index 644b8760..593a9dab 100644 --- a/biobb_dna/json_schemas/interbpcorr.json +++ b/biobb_dna/json_schemas/interbpcorr.json @@ -180,6 +180,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/interhpcorr.json b/biobb_dna/json_schemas/interhpcorr.json index 4285afac..bd88f862 100644 --- a/biobb_dna/json_schemas/interhpcorr.json +++ b/biobb_dna/json_schemas/interhpcorr.json @@ -157,6 +157,12 @@ "properties": { "type": "object", "properties": { + "basepair": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Name of basepair analyzed." + }, "remove_tmp": { "type": "boolean", "default": true, @@ -169,11 +175,11 @@ "wf_prop": true, "description": "Do not execute if output files exist." }, - "basepair": { + "sandbox_path": { "type": "string", - "default": null, - "wf_prop": false, - "description": "Name of basepair analyzed." + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/interseqcorr.json b/biobb_dna/json_schemas/interseqcorr.json index f13533f5..eb229ef0 100644 --- a/biobb_dna/json_schemas/interseqcorr.json +++ b/biobb_dna/json_schemas/interseqcorr.json @@ -101,6 +101,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/intrabpcorr.json b/biobb_dna/json_schemas/intrabpcorr.json index 38163546..af85a336 100644 --- a/biobb_dna/json_schemas/intrabpcorr.json +++ b/biobb_dna/json_schemas/intrabpcorr.json @@ -180,6 +180,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/intrahpcorr.json b/biobb_dna/json_schemas/intrahpcorr.json index 302871c2..5d9824c9 100644 --- a/biobb_dna/json_schemas/intrahpcorr.json +++ b/biobb_dna/json_schemas/intrahpcorr.json @@ -157,6 +157,12 @@ "properties": { "type": "object", "properties": { + "base": { + "type": "string", + "default": null, + "wf_prop": false, + "description": "Name of base analyzed." + }, "remove_tmp": { "type": "boolean", "default": true, @@ -169,11 +175,11 @@ "wf_prop": true, "description": "Do not execute if output files exist." }, - "base": { + "sandbox_path": { "type": "string", - "default": null, - "wf_prop": false, - "description": "Name of base analyzed." + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/intraseqcorr.json b/biobb_dna/json_schemas/intraseqcorr.json index 4c0bd265..aea8f67e 100644 --- a/biobb_dna/json_schemas/intraseqcorr.json +++ b/biobb_dna/json_schemas/intraseqcorr.json @@ -101,6 +101,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/json_schemas/puckering.json b/biobb_dna/json_schemas/puckering.json index 6840ed0d..0d3cabac 100644 --- a/biobb_dna/json_schemas/puckering.json +++ b/biobb_dna/json_schemas/puckering.json @@ -95,12 +95,6 @@ "wf_prop": false, "description": "Nucleic acid sequence corresponding to the input .ser file. Length of sequence is expected to be the same as the total number of columns in the .ser file, minus the index column (even if later on a subset of columns is selected with the *seqpos* option)." }, - "helpar_name": { - "type": "string", - "default": null, - "wf_prop": false, - "description": "helical parameter name." - }, "stride": { "type": "integer", "default": 1000, @@ -124,6 +118,12 @@ "default": false, "wf_prop": true, "description": "Do not execute if output files exist." + }, + "sandbox_path": { + "type": "string", + "default": "./", + "wf_prop": true, + "description": "Parent path to the sandbox directory." } } } diff --git a/biobb_dna/test/data/config/config_biobb_canal.json b/biobb_dna/test/data/config/config_biobb_canal.json index 62eb82cf..e1a49c5b 100644 --- a/biobb_dna/test/data/config/config_biobb_canal.json +++ b/biobb_dna/test/data/config/config_biobb_canal.json @@ -1,6 +1,8 @@ { "properties": { "series": true, + "histo": true, + "corr": true, "sequence": "CGCGAATTCGCG" } } \ No newline at end of file diff --git a/biobb_dna/test/data/config/config_biobb_canal.yml b/biobb_dna/test/data/config/config_biobb_canal.yml index a7b69a2c..3076b3ed 100644 --- a/biobb_dna/test/data/config/config_biobb_canal.yml +++ b/biobb_dna/test/data/config/config_biobb_canal.yml @@ -1,3 +1,5 @@ properties: + corr: true + histo: true sequence: CGCGAATTCGCG series: true diff --git a/biobb_dna/test/data/config/config_canal_unzip.json b/biobb_dna/test/data/config/config_canal_unzip.json new file mode 100644 index 00000000..014f8f86 --- /dev/null +++ b/biobb_dna/test/data/config/config_canal_unzip.json @@ -0,0 +1,6 @@ +{ + "properties": { + "type": "histo", + "helpar_name": "alphaC" + } +} \ No newline at end of file diff --git a/biobb_dna/test/data/config/config_canal_unzip.yml b/biobb_dna/test/data/config/config_canal_unzip.yml new file mode 100644 index 00000000..f96dfcd4 --- /dev/null +++ b/biobb_dna/test/data/config/config_canal_unzip.yml @@ -0,0 +1,3 @@ +properties: + helpar_name: alphaC + type: histo diff --git a/biobb_dna/test/data/config/config_dna_timeseries_unzip.json b/biobb_dna/test/data/config/config_dna_timeseries_unzip.json new file mode 100644 index 00000000..7f3cb8c2 --- /dev/null +++ b/biobb_dna/test/data/config/config_dna_timeseries_unzip.json @@ -0,0 +1,8 @@ +{ + "properties": { + "type": "hist", + "parameter": "shift", + "sequence": "CGCGAATTCGCG", + "index": 5 + } +} \ No newline at end of file diff --git a/biobb_dna/test/data/config/config_dna_timeseries_unzip.yml b/biobb_dna/test/data/config/config_dna_timeseries_unzip.yml new file mode 100644 index 00000000..7f8719b7 --- /dev/null +++ b/biobb_dna/test/data/config/config_dna_timeseries_unzip.yml @@ -0,0 +1,5 @@ +properties: + index: 5 + parameter: shift + sequence: CGCGAATTCGCG + type: hist diff --git a/setup.py b/setup.py index 5a430e41..47d02c2d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="biobb_dna", - version="4.2.2", + version="4.2.3", author="Biobb developers", author_email="genis.bayarri@irbbarcelona.com", description="Biobb_dna is a package composed of different analyses for nucleic acid trajectories.", @@ -21,7 +21,7 @@ install_requires=[ 'biobb_common==4.2.0', 'pandas>=1.3.0', - 'scikit-learn==0.24.2'], + 'scikit-learn>=0.24.2'], python_requires='>=3.8', entry_points={ "console_scripts": [