BU-ISCIII · saramonzon · Jan 30, 2024 · Jan 8, 2024 · Jan 8, 2024 · Jan 8, 2024
diff --git a/.github/workflows/dockerhub_push_release.yml b/.github/workflows/dockerhub_push_release.yml
diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml
@@ -0,0 +1,35 @@
+name: python_lint
+
+on:
+  push:
+    paths:
+      - '**.py'
+  pull_request:
+    paths:
+      - '**.py'
+
+jobs:
+  flake8_py3:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Setup Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.9.x
+          architecture: x64
+      - name: Checkout PyTorch
+        uses: actions/checkout@master
+      - name: Install flake8
+        run: pip install flake8
+      - name: Run flake8
+        run: flake8 --ignore E501,W503,E203,W605
+
+  black_lint:
+    runs-on: ubuntu-latest
+    steps:
+        - name: Setup
+          uses: actions/checkout@v2
+        - name: Install black in jupyter
+          run: pip install black[jupyter]
+        - name: Check code lints with Black
+          uses: psf/black@stable
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,38 +1,31 @@
 name: tests ci
-# This workflow runs the pipeline with the minimal test dataset to check that it completes any errors
+# This workflow runs the pipeline with the minimal test dataset to check 
+# is completed without any errors
 on:
-  push:
-    branches: [develop]
-  pull_request_target:
-    branches: [develop]
-  release:
-    types: [published]
+    pull_request:
+    push:
 
 jobs:
-  push_dockerhub:
-    name: Push new Docker image to Docker Hub (dev)
+  create-conda-env:
-  create-conda-env:
+  tests:
-  create-conda-env:
+  tests:
     runs-on: ubuntu-latest
-    # Only run for the official repo, for releases and merged PRs
-    if: ${{ github.repository == 'BU-ISCIII/taranis' }}
-    env:
-      DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
-      DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASSWORD }}
+
     steps:
-      - name: Check out pipeline code
+      - name: Checkout repository
         uses: actions/checkout@v2
 
-      - name: Build new docker image
-        run: docker build --no-cache . -t buisciii/taranis:dev
+      - name: Set up Miniconda
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          activate-environment: myenv
+          environment-file: environment.yml
 
-      - name: Push Docker image to DockerHub (develop)
-        run: |
-          echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
-          docker push buisciii/taranis:dev
-  run-tests:
-    name: Run tests
-    needs: push_dockerhub
-    runs-on: ubuntu-latest
-    steps:
-      - name: Run pipeline with test data
+      - name: Verify Conda environment
+        run: conda env list
+
+      - name: Run your script
         run: |
-            docker run buisciii/taranis:dev bash -c /opt/taranis/test/test.sh
+          source $CONDA/etc/profile.d/conda.sh
+          conda activate myenv
+          pip install .
+          taranis analyze-schema -i  test/MLST_listeria -o analyze_schema_test  --cpus 1
+
diff --git a/environment.yml b/environment.yml
@@ -1,18 +1,14 @@
-name: taranis
+name: taranis_env
 channels:
   - conda-forge
   - bioconda
   - defaults
 dependencies:
-  - conda-forge::python>=3.6
-  - conda-forge::biopython==1.72
-  - conda-forge::pandas==1.2.4
-  - conda-forge::openpyxl==3.0.7
-  - conda-forge::plotly==5.0.0
-  - conda-forge::numpy==1.20.3
-  - conda-forge::rich==13.7.0
-  - conda-forge::python-kaleido
+  - python=3.10
   - bioconda::prokka>=1.14
   - bioconda::blast>=2.9
   - bioconda::mash>=2
   - bioconda::prodigal=2.6.3
+  - pip
+  - pip :
+      -  -r requirements.txt
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,8 @@
+biopython
+rich
 click
 questionary
 bio
 scikit-learn
-plotly
+plotly
+kaleido
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import setup, find_packages
 
-version = "2.2.0"
+version = "3.0.0"
 
 with open("README.md") as f:
     readme = f.read()
@@ -28,9 +28,7 @@
     author_email="[email protected]",
     url="https://github.com/BU-ISCIII/taranis",
     license="GNU GENERAL PUBLIC LICENSE v.3",
-    entry_points={
-        "console_scripts": ["taranis=taranis.__main__:run_taranis"]
-    },
+    entry_points={"console_scripts": ["taranis=taranis.__main__:run_taranis"]},
     install_requires=required,
     packages=find_packages(exclude=("docs")),
     include_package_data=True,

diff --git a/taranis/__init__.py b/taranis/__init__.py
@@ -1,3 +1,3 @@
 import pkg_resources
 
-__version__ = pkg_resources.get_distribution("taranis").version
+__version__ = pkg_resources.get_distribution("taranis").version
diff --git a/taranis/__main__.py b/taranis/__main__.py
@@ -10,7 +10,6 @@
 import sys
 import time
 
-import taranis.prediction
 import taranis.utils
 import taranis.analyze_schema
 import taranis.reference_alleles
@@ -52,7 +51,7 @@ def run_taranis():
     )
 
     # stderr.print("[green]                                          `._,._,'\n", highlight=False)
-    __version__ = "2.1.0"
+    __version__ = "3.0.0"
     stderr.print(
         "\n" "[grey39]    Taranis version {}".format(__version__), highlight=False
     )
@@ -166,6 +165,12 @@ def taranis_cli(verbose, log_file):
     default=False,
     help="Remove no CDS alleles from the schema.",
 )
+@click.option(
+    "--output-allele-annot/--no-output-allele-annot",
+    required=False,
+    default=True,
+    help="output prokka/allele annotation for all alleles in locus",
+)
 @click.option(
     "--genus",
     required=False,
@@ -184,29 +189,41 @@ def taranis_cli(verbose, log_file):
     default="Genus",
     help="Use genus-specific BLAST databases for Prokka schema genes annotation (needs --genus). Default is False.",
 )
+@click.option(
+    "--cpus",
+    required=False,
+    multiple=False,
+    type=int,
+    default=1,
+    help="Number of cpus used for execution",
+)
 def analyze_schema(
-    inputdir,
-    output,
-    remove_subset,
-    remove_duplicated,
-    remove_no_cds,
-    genus,
-    species,
-    usegenus,
+    inputdir: str,
+    output: str,
+    remove_subset: bool,
+    remove_duplicated: bool,
+    remove_no_cds: bool,
+    output_allele_annot: bool,
+    genus: str,
+    species: str,
+    usegenus: str,
+    cpus: int,
 ):
     schema_files = taranis.utils.get_files_in_folder(inputdir, "fasta")
 
-    """
-    schema_analyze = {}
+    """ TODO.DELETE CODE
+    schema_analyze = []
     for schema_file in schema_files:
         schema_obj = taranis.analyze_schema.AnalyzeSchema(schema_file, output, remove_subset, remove_duplicated, remove_no_cds, genus, species, usegenus)
-        schema_analyze.update(schema_obj.analyze_allele_in_schema())
-
-    """
+        schema_analyze.append(schema_obj.analyze_allele_in_schema())
+    import pdb; pdb.set_trace()
+    _ = taranis.analyze_schema.collect_statistics(schema_analyze, output, output_allele_annot)
+    sys.exit(0)
     # for schema_file in schema_files:
+    """
     results = []
     start = time.perf_counter()
-    with concurrent.futures.ProcessPoolExecutor() as executor:
+    with concurrent.futures.ProcessPoolExecutor(max_workers=cpus) as executor:
         futures = [
             executor.submit(
                 taranis.analyze_schema.parallel_execution,
@@ -224,10 +241,11 @@ def analyze_schema(
         # Collect results as they complete
         for future in concurrent.futures.as_completed(futures):
             results.append(future.result())
-    _ = taranis.analyze_schema.collect_statistics(results, output)
+    _ = taranis.analyze_schema.collect_statistics(results, output, output_allele_annot)
     finish = time.perf_counter()
     print(f"Schema analyze finish in {round((finish-start)/60, 2)} minutes")
 
+
 # Reference alleles
 @taranis_cli.command(help_priority=2)
 @click.option(
@@ -247,12 +265,13 @@ def analyze_schema(
     help="Output folder to save reference alleles",
 )
 def reference_alleles(
-    schema,
-    output,
+    schema: str,
+    output: str,
 ):
     # taranis reference-alleles -s ../../documentos_antiguos/datos_prueba/schema_1_locus/ -o ../../new_taranis_result_code
-    # taranis reference-alleles -s ../../documentos_antiguos/datos_prueba/schema_test/ -o ../../new_taranis_result_code
+    # taranis reference-alleles -s /media/lchapado/Reference_data/proyectos_isciii/taranis/taranis_testing_data/listeria_testing_schema/ -o /media/lchapado/Reference_data/proyectos_isciii/taranis/test/reference_alleles
     schema_files = taranis.utils.get_files_in_folder(schema, "fasta")
+
     # Check if output folder exists
     if taranis.utils.folder_exists(output):
         q_question = (
@@ -268,7 +287,7 @@ def reference_alleles(
         try:
             os.makedirs(output)
         except OSError as e:
-            log.info("Unable to create folder at %s", output)
+            log.info("Unable to create folder at %s with error %s", output, e)
             stderr.print("[red] ERROR. Unable to create folder  " + output)
             sys.exit(1)
     """Create the reference alleles from the schema """
@@ -362,7 +381,7 @@ def allele_calling(
         try:
             os.makedirs(output)
         except OSError as e:
-            log.info("Unable to create folder at %s", output)
+            log.info("Unable to create folder at %s because %s", output, e)
-            log.info("Unable to create folder at %s because %s", output, e)
+            log.info("Unable to create folder at %s with error %s", output, e)
-            log.info("Unable to create folder at %s because %s", output, e)
+            log.info("Unable to create folder at %s with error %s", output, e)
             stderr.print("[red] ERROR. Unable to create folder  " + output)
             sys.exit(1)
     # Filter fasta files from reference folder