diff --git a/.github/workflows/integration03-ci.yml b/.github/workflows/integration03-ci.yml
new file mode 100644
index 00000000..7d79325f
--- /dev/null
+++ b/.github/workflows/integration03-ci.yml
@@ -0,0 +1,99 @@
+name: Run integration03
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+env:
+  debug: 'true'
+
+jobs:
+  integration:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ["ubuntu-latest"] # , "macos-latest", "windows-latest"
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: File tree
+        if: env.debug == 'true'
+        run: tree
+
+      - uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniforge-version: latest
+          auto-activate-base: true
+          auto-update-conda: true
+          channels: conda-forge
+          channel-priority: strict
+          activate-environment: pipeline_env
+          environment-file: pipeline_env.yaml
+# important: this patch is only to test if multivi integration works
+# issues are not related to panpipes https://discourse.scverse.org/t/error-when-training-model-on-m3-max-mps/1896/2
+# https://discourse.scverse.org/t/macbook-m1-m2-mps-acceleration-with-scvi/2075/4
+      - name: Install Panpipes
+        shell: bash -el {0}
+        run: |
+          pip install -e . 
+          conda list
+
+      - name: Conda info
+        if: env.debug == 'true'
+        shell: bash -el {0}
+        run: conda info
+
+      - name: Conda list
+        if: env.debug == 'true'
+        shell: pwsh
+        run: conda list
+
+      # Note: all three files are renamed during the download to trim the "subsample_" prefix
+      - name: Preparing the data
+        run: |
+          mkdir -p teaseq/integration && cd teaseq/integration
+          curl -L -o teaseq.h5mu https://figshare.com/ndownloader/files/44796985
+
+      # Note: we run the following to test that the commands works
+      # However, the following task will replace the file anyway
+      - name: Preparing the configuration file
+        shell: bash -el {0}
+        run: |
+          cd teaseq/integration
+          panpipes integration config
+
+      - name: Edit the submission file
+        run: |
+          cd teaseq/integration
+          curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/1849a8c65aa67702f423da2c3b2d1d9238adac6d/tests/integration_3/pipeline.yml
+      - name: Replace template contents in configuration file
+        run: |
+          cd teaseq/integration
+          sed -i 's+/Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env+pipeline_env+g' pipeline.yml
+
+      - name: File tree
+        if: env.debug == 'true'
+        run: tree teaseq
+
+      - name: Review pipeline tasks
+        shell: bash -el {0}
+        run: |
+          cd teaseq/integration
+          panpipes integration show full --local
+
+      - name: Run pipeline tasks
+        shell: bash -el {0}
+        run: |
+          cd teaseq/integration
+          panpipes integration make full --local
+
+      - name: File tree
+        if: env.debug == 'true'
+        run: tree teaseq
diff --git a/README.md b/README.md
index d4e9338b..35ca265b 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,9 @@
 Panpipes is a set of computational workflows designed to automate multimodal single-cell and spatial transcriptomic analyses by incorporating widely-used Python-based tools to perform quality control, preprocessing, integration, clustering, and reference mapping at scale.
 Panpipes allows reliable and customisable analysis and evaluation of individual and integrated modalities, thereby empowering decision-making before downstream investigations.
 
-**See our [documentation](https://panpipes-pipelines.readthedocs.io/en/latest/) and our [preprint](https://www.biorxiv.org/content/10.1101/2023.03.11.532085v2)**  
+**See our [documentation](https://panpipes-pipelines.readthedocs.io/en/latest/)**  
+
+**Panpipes is on [Genome Biology!](https://link.springer.com/article/10.1186/s13059-024-03322-7)**
 
 These workflows make use of [cgat-core](https://github.com/cgat-developers/cgat-core)
 
@@ -53,9 +55,9 @@ Check the example [submission file](https://github.com/DendrouLab/panpipes/blob/
 
 ## Citation
 
-[Panpipes: a pipeline for multiomic single-cell and spatial transcriptomic data analysis
-Fabiola Curion, Charlotte Rich-Griffin, Devika Agarwal, Sarah Ouologuem, Tom Thomas, Fabian J. Theis, Calliope A. Dendrou
-bioRxiv 2023.03.11.532085; doi: https://doi.org/10.1101/2023.03.11.532085](https://www.biorxiv.org/content/10.1101/2023.03.11.532085v2)
+[Curion, F., Rich-Griffin, C., Agarwal, D. et al. Panpipes: a pipeline for multiomic single-cell and spatial transcriptomic data analysis. Genome Biol 25, 181 (2024). 
+doi: https://doi.org/10.1186/s13059-024-03322-7](https://link.springer.com/article/10.1186/s13059-024-03322-7)
+
 
 ## Contributors
 
diff --git a/panpipes/panpipes/pipeline_integration.py b/panpipes/panpipes/pipeline_integration.py
index ba9e071e..916ff047 100644
--- a/panpipes/panpipes/pipeline_integration.py
+++ b/panpipes/panpipes/pipeline_integration.py
@@ -545,8 +545,7 @@ def run_bbknn_atac(outfile):
         cmd += " --neighbors_within_batch %i" % PARAMS['atac']['bbknn']['neighbors_within_batch']
     if PARAMS['atac']['neighbors']['npcs'] is not None:
         cmd += " --neighbors_n_pcs %s" % PARAMS['atac']['neighbors']['npcs']
-    #Forcing bbknn to run on PCA in case of atac
-    cmd += " --dimred PCA"
+    #cmd += " --dimred PCA"
     cmd += " > logs/3_atac_bbknn.log "
     if PARAMS['queues_long'] is not None:
         job_kwargs["job_queue"] = PARAMS['queues_long']
diff --git a/panpipes/python_scripts/batch_correct_bbknn.py b/panpipes/python_scripts/batch_correct_bbknn.py
index 23b802ba..8d8a0919 100644
--- a/panpipes/python_scripts/batch_correct_bbknn.py
+++ b/panpipes/python_scripts/batch_correct_bbknn.py
@@ -56,19 +56,14 @@
 # bbknn can't integrate on 2+ variables, so create a fake column with combined information
 columns = [x.strip() for x in args.integration_col.split(",")]
 
-if args.modality =="atac":
-    if "scaled_counts" in adata.layers.keys():
-        pass
-    else:
-        L.info("To run BBKNN on ATAC, PCA is needed. Computing PCA now.")
-        L.info("Scaling data and saving scaled counts to .layers['scaled_counts']")
-        sc.pp.scale(adata)
-        adata.layers["scaled_counts"] = adata.X.copy()
-        L.info("Computing PCA")
-        sc.tl.pca(adata, n_comps=min(50,adata.var.shape[0]-1), svd_solver='arpack', random_state=0) 
-
-if "X_pca" not in adata.obsm:
-    L.warning("X_pca could not be found in adata.obsm. Computing PCA with default parameters.")
+if args.dimred == "PCA":
+    dimred = "X_pca"
+elif args.dimred == "LSI":
+    dimred = "X_lsi"
+
+if dimred not in adata.obsm:
+    L.warning("Dimred '%s' could not be found in adata.obsm. Computing PCA with default parameters." % dimred)
+    dimred = "X_pca" 
     n_pcs = 50
     if adata.var.shape[0] < n_pcs or adata.obs.shape[0] < n_pcs:
         L.info("You have less features/samples than number of PCs you intend to calculate")
@@ -81,7 +76,6 @@
                     svd_solver='arpack', 
                     random_state=0) 
 
-
 L.info("Preparing for integration")
 
 if len(columns) > 1:
@@ -99,6 +93,7 @@
     # run bbknn
     L.info("Running BBKNN")
     adata = sc.external.pp.bbknn(adata,
+                        use_rep=dimred,
                         batch_key=args.integration_col,
                         copy=True,
                         n_pcs = int(args.neighbors_n_pcs),
diff --git a/panpipes/python_scripts/run_scib.py b/panpipes/python_scripts/run_scib.py
index 12a2e053..96f6c379 100644
--- a/panpipes/python_scripts/run_scib.py
+++ b/panpipes/python_scripts/run_scib.py
@@ -124,3 +124,5 @@
                 df.to_csv(os.path.join(args.fig_dir, modality, "scib_metrics_min_max_scaled.csv"))
             else:
                 df.to_csv(os.path.join(args.fig_dir, modality, "scib_metrics.csv"))
+                
+L.info("Done")
diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml
index 1092e866..dba8ee58 100644
--- a/tests/integration_1/pipeline.yml
+++ b/tests/integration_1/pipeline.yml
@@ -154,7 +154,7 @@ atac:
   # True or false depending on whether you want to run batch correction
   run: True
   # which dimensionality reduction to expect, LSI or PCA
-  dimred: LSI 
+  dimred: PCA 
   # what method(s) to use to run batch correction, you can specify multiple 
   # (comma-seprated string, no spaces)
   # choices: harmony,bbknn,combat
diff --git a/tests/integration_3/pipeline.yml b/tests/integration_3/pipeline.yml
new file mode 100644
index 00000000..f1bb3366
--- /dev/null
+++ b/tests/integration_3/pipeline.yml
@@ -0,0 +1,394 @@
+# Pipeline pipeline_integration.py configuration file
+# ==============================================
+
+# compute resource options
+# ------------------------
+resources:
+  # Number of threads used for parallel jobs
+  # this must be enough memory to load your mudata and do computationally intensive tasks
+  threads_high: 1
+  # this must be enough memory to load your mudata and do computationally light tasks
+  threads_medium: 1
+  # this must be enough memory to load text files and do plotting, requires much less memory than the other two
+  threads_low: 1
+  # if you access to a gpu-specific queue, how many gpu threads to request, make sure to edit the queues section below,
+  # so that panpipes can find your gpu queue
+  threads_gpu: 2
+# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
+condaenv: /Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env
+
+# allows for tweaking which queues jobs get submitted to, 
+# in case there is a special queue for long jobs or you have access to a gpu-specific queue
+# the default queue should be specified in your .cgat.yml file
+# leave blank if you do not want to use the alternative queues
+queues:
+  long: 
+  gpu:  
+
+# Start
+# --------------------------
+# either one that exists already with
+sample_prefix: teaseq
+#this is what comes out of the filtering/preprocessing
+preprocessed_obj: teaseq.h5mu
+# contains layers: raw_counts, logged_counts, and has scaled or logged counts in X 
+
+
+#--------------------------
+# Batch correction
+# -------------------------
+# unimodal: correct each modality independently
+rna:
+  # True or false depending on whether you want to run batch correction
+  run: True 
+  # what method(s) to use to run batch correction, you can specify multiple 
+  # choices: harmony,bbknn,scanorama,scvi (comma-seprated string, no spaces)
+  tools: harmony,scvi
+  # this is the column you want to batch correct on. if you specify a comma separated list, 
+  # they will be all used simultaneosly. 
+  # Specifically all columns specified will be merged into one 'batch' columns.
+  # if you want to test correction for one at a time, 
+  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
+  # integration_by_tissue ...
+  column: dataset
+  #-----------------------------
+  # Harmony args
+  #-----------------------------
+  harmony:
+  # sigma value, used by Harmony
+    sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
+  # number of pcs, used by Harmony
+    npcs: 30
+  #----------------------------
+  # BBKNN args # https://bbknn.readthedocs.io/en/latest/
+  #-----------------------------
+  bbknn:
+    neighbors_within_batch: 20
+  #-----------------------------
+  # SCVI args
+  #-----------------------------
+  scvi:
+    seed: 1492
+    exclude_mt_genes: True
+    mt_column: mt
+    model_args:
+        n_layers: 
+        n_latent:
+        gene_likelihood: zinb
+    training_args:
+        max_epochs: 40
+        train_size: 0.9
+        early_stopping: True
+    training_plan: 
+        lr: 0.001
+        n_epochs_kl_warmup: 40
+        reduce_lr_on_plateau: True
+        lr_scheduler_metric: 
+        lr_patience: 8
+        lr_factor: 0.1
+  #----------------------------
+  # find neighbour parameters
+  #-----------------------------
+  # to reuse these params, (for example for WNN) please use anchors (&) and scalars (*) in the relevant place
+  # i.e. &rna_neighbors will be called by *rna_neighbors where referenced
+  neighbors: &rna_neighbors 
+  # number of Principal Components to calculate for neighbours and umap:
+  #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
+  #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
+  # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony
+  # note: scvelo default is 30
+    npcs: 30
+    # number of neighbours
+    k: 30
+    # metric: euclidean | cosine
+    metric: euclidean
+    # scanpy | hnsw (from scvelo)
+    method: scanpy
+
+#--------------------------
+prot:
+  # True or false depending on whether you want to run batch correction
+  run: False 
+  # what method(s) to use to run batch correction, you can specify multiple 
+  # choices: harmony,bbknn,combat
+  tools: harmony
+  # this is the column you want to batch correct on. if you specify a comma separated list (no spaces), 
+  # they will be all used simultaneosly. if you want to test correction for one at a time, 
+  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
+  # integration_by_tissue ...
+  column: orig.ident 
+  #----------------------------
+  # Harmony args
+  #-----------------------------
+  harmony:
+  # sigma value, used by Harmony
+    sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
+  # number of pcs, used by Harmony
+    npcs: 30
+  #----------------------------
+  # BBKNN args # https://bbknn.readthedocs.io/en/latest/
+  #-----------------------------
+  bbknn:
+    neighbors_within_batch: 20
+  #----------------------------›
+  # find neighbour parameters
+  #-----------------------------
+  neighbors: &prot_neighbors
+    # number of Principal Components to calculate for neighbours and umap:
+    #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
+    #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
+    # note: scvelo default is 30
+    npcs: 30
+    # number of neighbours
+    k: 30
+    # metric: euclidean | cosine
+    metric: euclidean
+    # scanpy | hnsw (from scvelo)
+    method: scanpy
+#--------------------------
+atac:
+  # True or false depending on whether you want to run batch correction
+  run: False
+  # which dimensionality reduction to expect, LSI or PCA
+  dimred: LSI 
+  # what method(s) to use to run batch correction, you can specify multiple 
+  # (comma-seprated string, no spaces)
+  # choices: harmony,bbknn,combat
+  tools: harmony,bbknn
+  # this is the column you want to batch correct on. if you specify a comma separated list, 
+  # they will be all used simultaneosly. if you want to test correction for one at a time, 
+  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
+  # integration_by_tissue ...
+  column: dataset
+  #----------------------------
+  # Harmony args
+  #-----------------------------
+  harmony:
+  # sigma value, used by Harmony
+    sigma: 0.1 
+  # theta value used by Harmony, default is 1
+    theta: 1.0
+  # number of pcs, used by Harmony
+    npcs: 30
+  #----------------------------
+  # BBKNN args # https://bbknn.readthedocs.io/en/latest/
+  #-----------------------------
+  bbknn:
+    neighbors_within_batch: 30
+  #----------------------------
+  # find neighbour parameters
+  #-----------------------------
+  neighbors: &atac_neighbors
+    # number of Principal Components to calculate for neighbours and umap:
+    #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering 
+    #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
+    # note: scvelo default is 30
+    npcs: 30
+    # number of neighbours
+    k: 30
+    # metric: euclidean | cosine
+    metric: euclidean
+    # scanpy | hnsw (from scvelo)
+    method: scanpy
+#----------------------------------------------
+# multimodal integration
+# remember to specify knn graph params in the section "neighbors"
+#----------------------------------------------
+multimodal:
+  # True or false depending on whether you want to run batch correction
+  run: True 
+  # what method(s) to use to run batch correction, you can specify multiple 
+  # choices: totalvi, mofa, MultiVI, WNN
+  # list e.g. below
+  tools: 
+    - totalvi
+
+  # this is the column you want to batch correct on. if you specify a comma separated list, 
+  # they will be all used simultaneosly. if you want to test correction for one at a time, 
+  # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, 
+  # integration_by_tissue ...
+  column_continuous: 
+  column_categorical: dataset
+  # extra params:
+  totalvi:
+    seed: 1492
+    # this is a minimal set of parameters that will be expected
+    # you can add any other param from the tutorials and they will
+    # be parsed alongside the others
+    seed: 1492
+    # totalvi will run on rna and prot
+    modalities: rna,prot
+    exclude_mt_genes: True
+    mt_column: mt
+    # to filter outliers manually create a column called adt_outliers in mdata['prot'].obs
+    filter_by_hvg: True
+    filter_prot_outliers: False
+    model_args: 
+      latent_distribution: "normal"
+    training_args:
+      max_epochs: 40
+      train_size: 0.9
+      early_stopping: True
+    training_plan: None
+  MultiVI:
+    seed: 1492
+    # this is a minimal set of parameters that will be expected
+    # you can add any other param from the tutorials and they will
+    # be parsed alongside the others
+    # leave arguments blank for default
+    seed: 1492
+    lowmem: True
+    # Set lowmem to True will subset the atac to the top 25k HVF. 
+    # This is to deal with concatenation of atac,rna on large datasets which at the moment is suboptimally required by scvitools.
+    # >100GB of RAM are required to concatenate atac,rna with 15k cells and 120k total features (union rna,atac)
+    model_args:
+      # (default: None)
+      n_hidden :  
+      # (default: None)
+      n_latent :  
+      #(bool,default: True)
+      region_factors : True 
+       #{‘normal’, ‘ln’} (default: 'normal')
+      latent_distribution : 'normal'
+      #(bool,default: False)
+      deeply_inject_covariates : False 
+      #(bool, default: False)
+      fully_paired : False 
+    training_args:
+      #(default: 500)
+      max_epochs : 500 
+      #float (default: 0.0001)
+      lr : 1.0e-05
+      #leave blanck for default str | int | bool | None (default: None)
+      use_gpu :
+      # float (default: 0.9)
+      train_size : 0.9 
+      # leave blanck for default, float | None (default: None)
+      validation_size : 
+      # int (default: 128)
+      batch_size : 128
+      #float (default: 0.001)
+      weight_decay : 0.001 
+      #float (default: 1.0e-08)
+      eps : 1.0e-08 
+      #bool (default: True)
+      early_stopping : True 
+      #bool (default: True)
+      save_best : True
+       #leave blanck for default int | None (default: None)
+      check_val_every_n_epoch :
+      #leave blanck for default int | None (default: None)
+      n_steps_kl_warmup : 
+       # int | None (default: 50)
+      n_epochs_kl_warmup : 50
+      #bool (default: True)
+      adversarial_mixing : True 
+       #leave blanck for default dict | None (default: None)
+    training_plan :
+  mofa:
+    # this is a minimal set of parameters that will be expected
+    # you can add any other param from the tutorials and they will
+    # be parsed alongside the others
+    #  (comma-separated string, no spaces)
+    modalities: 
+    filter_by_hvg: True
+    n_factors: 10
+    n_iterations: 1000
+    #pick one among fast, medium, slow
+    convergence_mode: fast
+    save_parameters: False
+    #if save_parameters True, set the following, otherwise leave blank
+    outfile: 
+  WNN:
+    # muon implementation of WNN 
+    modalities: rna,prot,atac 
+    # run wnn on batch corrected unimodal data, set each of the modalities you want to use to calc WNN to ONE method.
+    # leave to None and it will default to de novo calculation of neighbours on non corrected data for that modality using specified params 
+    batch_corrected:
+      # options are: "bbknn", "scVI", "harmony", "scanorama"
+      rna: None
+      # options are "harmony", "bbknn"
+      prot: None
+      # options are "harmony"
+      atac: None 
+    # please use anchors (&) and scalars (*) in the relevant place
+    # i.e. &rna_neighbors will be called by *rna_neighbors where referenced
+    knn:
+      rna: *rna_neighbors
+      prot: *prot_neighbors
+      atac: *atac_neighbors
+    #WNN has its own neighbors search, specify here  
+    n_neighbors: #leave blank and it will default to aritmetic mean across modalities neighbors
+    n_bandwidth_neighbors: 20
+    n_multineighbors: 200
+    metric: 'euclidean'
+    low_memory: True
+  
+  ###
+  # neighbours knn calculation for multimodal analysis. 
+  ###
+  neighbors:
+    # number of Principal Components to calculate for neighbours and umap:
+    #   -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on
+    #   -if Harmony is the method of choice, it will use these components to create a corrected dim red.)
+    # note: scvelo default is 30
+    npcs: 30
+    # number of neighbours
+    k: 30
+    # metric: euclidean | cosine
+    metric: euclidean
+    # scanpy | hnsw (from scvelo)
+    method: scanpy
+
+
+
+#-----------------------------
+# Plot
+#-----------------------------
+plotqc:
+  # grouping var must be a categorical varible, 
+  #  (comma-seprated strings, no spaces)
+  # umaps comparing the integration (one plot per value in the group)
+  # for each batch correction column plus any extras in grouping var
+  grouping_var: dataset,sample_id
+  # what other metrics do you want to plot on each modalities embedding, (one plot per group)
+  # use mod:variable notation, 
+  # any metrics that you want to plot on all modality umaps go under "all"
+  # these can be categorical or numeric
+  all: rep:receptor_subtype
+  rna: rna:total_counts
+  prot: prot:total_counts
+  atac:
+  multimodal: rna:total_counts
+  # if you want to add any additional plots, just remove the log file logs/plot_batch_corrected_umaps.log
+  # and run panpipes integration make plot_umaps
+
+# ----------------
+# Make final object
+# ----------------
+# Final choices: Leave blank until you have reviewed the results from running
+# panpipes integration make full
+# This step will produce a mudata object with one layer per modality with 
+# one correction per modality and one multimodal layer.
+# Choose the integration results you want to merge in the final object
+# For unimodal integration: to pick the uncorrected version use "no_correction"
+# then run
+# panpipes integration make merge_integration
+final_obj:
+  rna:
+    include: True
+    bc_choice: no_correction
+  prot:
+    include: True
+    bc_choice: harmony
+  atac:
+    include: False
+    bc_choice: bbknn
+  multimodal:
+    include: True
+    bc_choice: WNN
+