diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..799f50d1
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,14 @@
+[flake8]
+max-line-length = 88
+ignore = D105, E731, W503, F821, E203
+docstring-convention = numpy
+per-file-ignores = __init__.py:F401
+exclude =
+    .git
+    .github
+    .dvc
+    __pycache__
+    .venv
+    .mypy_cache
+    .pytest_cache
+    conf.py
\ No newline at end of file
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000..b6ab8409
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @jeandut @mandreux-owkin @qklopfenstein-owkin
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..a78a9e9d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,29 @@
+---
+name: Bug report
+about: Create a report to help improve the code
+title: "[BUG] "
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Provide snippets of code and steps on how to reproduce the behavior.
+Please also specify the version you are using.
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+- OS: [e.g. iOS]
+- Python version
+- Code version [e.g. 0.02 or commit hash]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..bbcbbe7d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/workflows/.gitkeep b/.github/workflows/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.github/workflows/PULL_REQUEST_TEMPLATE.md b/.github/workflows/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 00000000..b7198165
--- /dev/null
+++ b/.github/workflows/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,34 @@
+<!--
+Thanks for contributing a pull request!
+This template is designed to help save time for reviewers and yourself.
+
+1. If your PR is not ready for review yet, please do not hesitate to publish it
+as a draft to run the CI.
+
+2. You can prefix the title of the PR with an acronym that indicates the
+type of PR, e.g. CI if related to the CI, DOC for the documentation,
+FIX if there is a bug fix, FEAT if a new feature is added, EXP if a new
+experiment is added.
+-->
+
+#### Reference Issue or PRs
+<!--
+If this PR is related to an existing issue or PR please reference it, eg:
+Fixes #1234.
+You can use github keywords as described:
+https://github.com/blog/1506-closing-issues-via-pull-requests
+-->
+
+
+#### What does your PR implement? Please be specific
+
+
+
+
+#### Checklist
+
+Have you:
+- [ ] Added unit tests under `tests/` (if you modified the `fedeca` folder)?
+- [ ] Added docstrings for all functions?
+- [ ] Added instructions on how to run your experimental file, and the
+expected results (e.g. in a README or in the file itself)?
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 00000000..1ba6b93e
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,44 @@
+name: build_docs
+
+# Controls when the workflow will run
+on:
+  push:
+    branches:
+      - 'main'
+  pull_request:
+    branches:
+      - 'main'
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  build_docs:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.9"]
+        # python: ["3.9", "3.10", "3.11"]  # TODO: expand to other pythons
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install dependencies
+        env:
+          GIT_TOKEN: ${{ secrets.GIT_TOKEN }}
+          GIT_USERNAME: ${{ secrets.GIT_USERNAME }}
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[all_extra]"
+      - name: Compile docs
+        run: |
+          cd docs
+          make clean html
+      - name: Publish docs to gh-pages
+        uses: peaceiris/actions-gh-pages@v3
+        if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
+        with:
+          publish_branch: gh-pages
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: docs/build/html/
+          force_orphan: true
diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml
new file mode 100644
index 00000000..47deee49
--- /dev/null
+++ b/.github/workflows/flake8.yml
@@ -0,0 +1,29 @@
+name: linter
+
+on:
+  push:
+    branches:
+      - 'main'
+  pull_request:
+    branches:
+      - 'main'
+
+jobs:
+  lint:
+    name: Lint code base
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Setup Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Lint with flake
+        run: |
+          pip install flake8
+          pip install flake8-docstrings
+          flake8 fedeca/ --max-line-length=88 --append-config .flake8
diff --git a/.github/workflows/pr_validation.yml b/.github/workflows/pr_validation.yml
new file mode 100644
index 00000000..f634ded7
--- /dev/null
+++ b/.github/workflows/pr_validation.yml
@@ -0,0 +1,42 @@
+name: pr-validation
+
+# Controls when the workflow will run
+on:
+  pull_request:
+    paths-ignore:
+    - 'experiments/**'  # if only the exps are modified, no need to run it
+    - 'docs/**'  # if only the docs are modified, no need to run it
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  run_tests:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.9"]
+        # python: ["3.9", "3.10", "3.11"]  # TODO: expand to other pythons
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install dependencies
+        env:
+          GIT_TOKEN: ${{ secrets.GIT_TOKEN }}
+          GIT_USERNAME: ${{ secrets.GIT_USERNAME }}
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[all_extra]"
+      - name: Testing with pytest
+        run: |
+          coverage run -m pytest -v fedeca
+      - name: Generate code coverage report
+        run: |
+          coverage html --omit="*/local-worker/*"
+      - name: Upload coverage artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-coverage-report
+          path: htmlcov/
+          retention-days: 20
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..a4c2fd71
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,141 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+tmp/
+local-worker/
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# other files
+.DS_Store
+
+# substra
+temp/
+
+# fedeca
+tokens/
+iptw_experiment/
\ No newline at end of file
diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 00000000..a28c27e2
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,3 @@
+[settings]
+profile = black
+skip=__init__.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..82c98e4b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,37 @@
+repos:
+-   repo: https://github.com/ambv/black
+    rev: 22.1.0
+    hooks:
+    - id: black
+      additional_dependencies: ['click==8.0.4']
+      args: # arguments to configure black
+       - --line-length=88
+-   repo: https://github.com/PyCQA/docformatter
+    rev: v1.7.5
+    hooks:
+    - id: docformatter
+      name: Format docstrings with `docformatter`
+      language: system
+      types: [python]
+      require_serial: true
+      args: [--in-place,--wrap-summaries=88, --wrap-descriptions=87, --recursive]
+
+-   repo: https://github.com/pre-commit/mirrors-isort
+    rev: v5.10.1
+    hooks:
+    - id: isort
+      args : ["--profile", "black", "--filter-files"]
+
+-   repo: local
+    hooks:
+    - id: flake8
+      name: "Run linter flake8"
+      language: system
+      types: [python]
+      require_serial: true
+      entry: flake8
+      args :
+       - "--max-line-length=88"
+       - "--per-file-ignores=*/__init__.py:F401"
+       # copy ignore cases from .flake8 file to be run by pre-commit
+       - "--ignore=D105, E731, W503, F821, E203"
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 00000000..5c0a5f3c
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,39 @@
+[MAIN]
+load-plugins=
+    pylint.extensions.check_elif,
+    pylint.extensions.bad_builtin,
+    pylint.extensions.docparams,
+    pylint.extensions.for_any_all,
+    pylint.extensions.set_membership,
+    pylint.extensions.code_style,
+    pylint.extensions.overlapping_exceptions,
+    pylint.extensions.typing,
+    pylint.extensions.redefined_variable_type,
+    pylint.extensions.comparison_placement,
+
+[MESSAGES CONTROL]
+disable =
+    too-many-instance-attributes,
+    too-few-public-methods,
+    too-many-public-methods,
+    too-many-arguments,
+    too-many-locals,
+    fixme,
+
+[BASIC]
+good-names = a,b,c,i,j,k,n,r,s,t,u,v,w,x,y,z,id,_,df,X
+good-names-rgxs = X_[a-zA-Z]*
+
+[FORMAT]
+max-line-length = 88
+max-module-lines = 1500
+
+[SIMILARITIES]
+ignore-imports = yes
+min-similarity-lines = 10
+
+[TYPING]
+runtime-typing = yes
+
+[TYPECHECK]
+generated-members=numpy.*,torch.*
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..03c00ef6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,35 @@
+# FedECA
+
+:arrow_right:[The API doc is available here](https://owkin.github.io/fedeca/):arrow_left:  
+
+## License
+
+Before using the code be sure to check our [license](./license.md) first.
+
+
+## Installation instructions
+
+To install the package, create an env with python `3.9` with conda
+
+```bash
+conda create -n fedeca python=3.9
+conda activate fedeca
+```
+
+Within the environment, install the package by running:
+```
+git clone https://github.com/owkin/fedeca.git
+pip install -e ".[all_extra]"
+```
+
+If you plan on developing, you should also install the pre-commit hooks
+
+```bash
+pre-commit install
+```
+
+This will run all the pre-commit hooks at each commit, ensuring a clean repo.
+
+## Quickstart
+
+Go [here](./quickstart/quickstart.md).
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 00000000..53b4d1f3
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,25 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?= --keep-going -n
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+LINKCHECKDIR  = build/linkcheck
+
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+livehtml:
+	sphinx-autobuild -b html $(SOURCEDIR) $(BUILDDIR)/html
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..9cb35597
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,16 @@
+# Documentation
+
+The documentation of FedECA is generated using Sphinx and hosted on github.
+If you want to build the documentation from source, start by cloning the repository,
+using instructions in the main readme, and install it with the `[all_extra]` option.
+
+```bash
+pip install -e ".[all_extra]"
+```
+
+You can now trigger the build using `make` command line in the `docs` folder.
+
+```bash
+cd docs
+make clean html
+```
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 00000000..dc1312ab
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/source/_static/favicon.ico b/docs/source/_static/favicon.ico
new file mode 100755
index 00000000..b20ed84b
Binary files /dev/null and b/docs/source/_static/favicon.ico differ
diff --git a/docs/source/_static/fonts.css b/docs/source/_static/fonts.css
new file mode 100644
index 00000000..0f048c33
--- /dev/null
+++ b/docs/source/_static/fonts.css
@@ -0,0 +1,35 @@
+@font-face {
+    font-family: "averta";
+    src: url("./Averta/AvertaDemo-Regular.otf") format("truetype");
+}
+
+@font-face {
+    font-family: "averta";
+    src: url("./Averta/AvertaDemo-Bold.otf") format("truetype");
+    font-weight: bold;
+}
+
+@font-face {
+    font-family: "averta";
+    src: url("./Averta/AvertaDemo-ExtraBoldItalic.otf") format("truetype");
+    font-weight: bolder;
+    font-style: italic;
+}
+
+@font-face {
+    font-family: "futuralt";
+    src: url("./Futura/FuturaLT.ttf") format("truetype");
+}
+
+@font-face {
+    font-family: "futuralt";
+    src: url("./Futura/FuturaLT-Bold.ttf") format("truetype");
+    font-weight: bold;
+}
+
+@font-face {
+    font-family: "futuralt";
+    src: url("./Futura/FuturaLT-Heavy.otf") format("truetype");
+    font-weight: bolder;
+    font-style: italic;
+}
diff --git a/docs/source/_static/owkin.css b/docs/source/_static/owkin.css
new file mode 100644
index 00000000..38041b21
--- /dev/null
+++ b/docs/source/_static/owkin.css
@@ -0,0 +1,186 @@
+:root {
+    --color-blue: #3c73c4;
+    --color-dark-grey: #262c3c;
+
+    --font-title: "futuralt", Trebuchet MS, arial, sans-serif;
+
+    --font-size-xxl: 62px;
+    --font-size-xl: 4rem;
+    --font-size-large: 2.4rem;
+    --font-size-normal: 17px;
+    --font-size-small: 14px;
+}
+
+html {
+    font-size: 62.5%;
+}
+
+body {
+    font-family: "averta", "Helvetica Neue", helvetica, arial, sans-serif;
+    color: black;
+    text-align:justify;
+    font-size: var(--font-size-normal);
+}
+
+a {
+    color: var(--color-blue);
+}
+
+a:hover {
+    color: black;
+}
+
+h1, h2, h3,
+.rst-content .toctree-wrapper > p.caption {
+    font-family: var(--font-title);
+    font-weight: bolder;
+    color: var(--color-dark-grey);
+}
+
+h1 {
+    font-size: var(--font-size-xxl);
+    line-height: 1.1;
+    text-align: left;
+    /*text-transform: uppercase;*/
+}
+
+h2,
+.rst-content .toctree-wrapper > p.caption {
+    font-size: var(--font-size-xl);
+    line-height: 1.4;
+    margin: 6rem auto 3rem;
+}
+
+h3 {
+    font-size: var(--font-size-large);
+    line-height: 1.4;
+    margin: 4.5rem auto 2.5rem;
+}
+
+p, ul, ol {
+    font-size: var(--font-size-normal);
+    line-height: 1.4;
+    margin: 0 0 1.5em 0;
+}
+
+.wy-nav-content {
+    max-width: 900px;
+}
+
+@media screen and (min-width: 1100px) {
+    .wy-nav-content {
+        margin-left: 75px;
+        background-color: white;
+    }
+
+    .rst-content div[class^=highlight] {
+        max-width: calc(100vw - 300px - 2 * 75px - 3.236em);
+        min-width: 100%;
+        width: fit-content;
+        overflow-x: hidden;
+
+    }
+}
+
+.wy-side-nav-search,
+.wy-nav-content-wrap,
+.wy-body-for-nav,
+.wy-nav-side {
+    color: black;
+    background-color: white !important;
+}
+
+.wy-side-nav-search {
+    padding: 2.8rem 0 0 0;
+
+}
+
+
+.wy-breadcrumbs {
+    /*text-transform: uppercase;*/
+    font-weight: bold;
+    font-size: var(--font-size-small);
+}
+
+a,
+a:visited {
+    color: var(--color-blue);
+}
+
+a:hover,
+a:visited:hover {
+    color: black;
+}
+
+.btn-download {
+    display: flex;
+    max-width: 260px;
+    align-items: center;
+    text-transform: uppercase;
+    padding-bottom: 0.625rem;
+    border-bottom: 1px solid #f3f4f7;
+}
+
+.btn-download:hover {
+    border-bottom: 1px solid;
+}
+
+.btn-download-image {
+    height: 40px !important;
+    margin-right: 8px;
+}
+
+.btn {
+    display: inline-block;
+    padding: 8px 20px;
+    border-radius: 30px;
+    border: 1px solid;
+    text-transform: uppercase;
+    font-size: var(--font-size-small);
+    text-decoration: none;
+    margin: 0 10px 12px 0;
+    line-height: 2;
+    text-align: center;
+    color: var(--color-blue);
+}
+
+.btn:active {
+    padding: 8px 20px;
+}
+
+.btn-neutral,
+.btn-neutral:visited {
+    color: var(--color-blue) !important;
+    background-color: transparent !important;
+}
+
+.btn-neutral:hover,
+.btn-neutral:visited:hover {
+    color: black !important;
+    background-color: transparent !important;
+}
+
+.rst-content .admonition-title,
+.wy-alert-title {
+    font-size: var(--font-size-small);
+}
+
+html.writer-html4 .rst-content dl:not(.docutils) > dt,
+html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) > dt {
+    font-size: var(--font-size-small);
+}
+
+html.writer-html5 .rst-content table.docutils td > p,
+html.writer-html5 .rst-content table.docutils th > p {
+    font-size: var(--font-size-small);
+}
+
+.wy-nav-top {
+    background-color: var(--color-dark-grey);
+}
+
+.wy-nav-top a,
+.wy-nav-top a:visited,
+.wy-nav-top a:hover {
+    color: white;
+}
diff --git a/docs/source/_static/sidebar.css b/docs/source/_static/sidebar.css
new file mode 100644
index 00000000..cb263504
--- /dev/null
+++ b/docs/source/_static/sidebar.css
@@ -0,0 +1,92 @@
+.wy-side-nav-search > a {
+    font-family: var(--font-title);
+    font-weight: bolder;
+    color: var(--color-dark-grey);
+    display: flex;
+    flex-direction: column-reverse;
+    font-size: 2rem;
+    /*text-transform: uppercase;*/
+    align-items: center;
+    padding: 0;
+    margin-bottom: 0.5em;
+}
+
+.wy-side-nav-search .wy-dropdown>a img.logo,
+.wy-side-nav-search>a img.logo {
+    width: 200px;
+    margin: 0 !important;
+}
+
+.wy-side-nav-search > a:before {
+    display: none;
+}
+
+.wy-side-nav-search>div.version {
+    font-family: var(--font-title);
+    color: black;
+    font-size: 1.4rem;
+    font-weight: bolder;
+}
+
+.wy-side-nav-search input[type=text] {
+    padding: 8px 20px;
+    margin: 2.8rem 1rem 0 1rem;
+    border-radius: 30px;
+    border: 1px solid;
+    font-size: 17px;
+    text-decoration: none;
+    line-height: 2;
+    color: var(--color-blue);
+    width: calc(100% - 2rem);
+}
+
+.wy-menu-vertical header,
+.wy-menu-vertical p.caption {
+    font-size: 1.4rem;
+    margin-top: 2.8rem;
+    /*text-transform: uppercase;*/
+    color: black;
+    opacity: 0.4;
+}
+
+.wy-menu-vertical a,
+.wy-menu-vertical li.current a {
+    color: black;
+    text-align: left;
+    border-right: none;
+}
+.wy-menu-vertical a:hover,
+.wy-menu-vertical li.current a:hover {
+    background-color: transparent;
+}
+.wy-menu-vertical li.current,
+.wy-menu-vertical li.toctree-l1.current > a,
+.wy-menu-vertical li.toctree-l2.current > a,
+.wy-menu-vertical li.toctree-l2.current li.toctree-l3 > a {
+    color: var(--color-blue);
+    background-color: transparent;
+    border-bottom: none;
+    border-top: none;
+}
+
+.rst-versions {
+    background: var(--color-dark-grey);
+    color: white;
+    font-family: inherit;
+}
+
+.rst-versions .rst-current-version {
+    background-color: var(--color-dark-grey);
+    color: white;
+    font-size: var(--font-size-small);
+}
+
+.rst-versions .rst-other-versions {
+    background-color: var(--color-dark-grey);
+    color: white;
+    font-size: var(--font-size-small);
+}
+
+.rst-versions .rst-other-versions dd a {
+    color: white;
+}
diff --git a/docs/source/api/algorithms.rst b/docs/source/api/algorithms.rst
new file mode 100644
index 00000000..a79cec3f
--- /dev/null
+++ b/docs/source/api/algorithms.rst
@@ -0,0 +1,6 @@
+fedeca.algorithms
+=========================
+
+.. currentmodule:: fedeca.algorithms
+
+.. autoclass:: fedeca.algorithms.TorchWebDiscoAlgo
diff --git a/docs/source/api/competitors.rst b/docs/source/api/competitors.rst
new file mode 100644
index 00000000..bd08e3eb
--- /dev/null
+++ b/docs/source/api/competitors.rst
@@ -0,0 +1,8 @@
+fedeca.competitors
+=========================
+
+.. autoclass:: fedeca.PooledIPTW
+
+.. autoclass:: fedeca.MatchingAjudsted
+
+.. autoclass:: fedeca.NaiveComparison
diff --git a/docs/source/api/iptw.rst b/docs/source/api/iptw.rst
new file mode 100644
index 00000000..16d97f2c
--- /dev/null
+++ b/docs/source/api/iptw.rst
@@ -0,0 +1,4 @@
+fedeca.fedeca_core
+=========================
+
+.. autoclass:: fedeca.FedECA
diff --git a/docs/source/api/metrics.rst b/docs/source/api/metrics.rst
new file mode 100644
index 00000000..89ebf5f6
--- /dev/null
+++ b/docs/source/api/metrics.rst
@@ -0,0 +1,4 @@
+fedeca.metrics
+=========================
+
+.. automodule:: fedeca.metrics.metrics
diff --git a/docs/source/api/scripts.rst b/docs/source/api/scripts.rst
new file mode 100644
index 00000000..7bb4b48e
--- /dev/null
+++ b/docs/source/api/scripts.rst
@@ -0,0 +1,4 @@
+fedeca.scripts
+=========================
+
+.. autoclass:: fedeca.scripts.substra_assets.csv_opener.CSVOpener
diff --git a/docs/source/api/strategies.rst b/docs/source/api/strategies.rst
new file mode 100644
index 00000000..e5e32ecc
--- /dev/null
+++ b/docs/source/api/strategies.rst
@@ -0,0 +1,8 @@
+fedeca.strategies
+=========================
+
+.. currentmodule:: fedeca.strategies.webdisco
+
+.. autoclass:: fedeca.strategies.WebDisco
+
+.. automodule:: fedeca.strategies.webdisco_utils
diff --git a/docs/source/api/utils.rst b/docs/source/api/utils.rst
new file mode 100644
index 00000000..965c68bf
--- /dev/null
+++ b/docs/source/api/utils.rst
@@ -0,0 +1,14 @@
+fedeca.utils
+=========================
+
+.. automodule:: fedeca.utils.data_utils
+
+.. automodule:: fedeca.utils.experiments_utils
+
+.. automodule:: fedeca.utils.moments_utils
+
+.. automodule:: fedeca.utils.substrafl_utils
+
+.. automodule:: fedeca.utils.tensor_utils
+
+.. automodule:: fedeca.utils.typing
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 00000000..5fd7c344
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,266 @@
+# -*- coding: utf-8 -*-
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+from datetime import date
+
+import git
+
+# -- Project information -----------------------------------------------------
+
+project = "FedECA"
+copyright = f"{date.today().year}, OWKIN"
+author = "OWKIN"
+current_commit = git.Repo(search_parent_directories=True).head.object.hexsha
+version = current_commit
+release = version
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.napoleon",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.mathjax",
+    "sphinx_rtd_theme",
+    "sphinx.ext.ifconfig",
+    "myst_parser",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.autosectionlabel",
+    "sphinx.ext.intersphinx",
+    # "sphinx_gallery.gen_gallery",
+    "sphinx_autodoc_typehints",
+    "sphinxcontrib.bibtex",
+]
+
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+    "pandas": ("https://pandas.pydata.org/docs/", None),
+    "anndata": ("https://anndata.readthedocs.io/en/latest/", None),
+}
+
+autosectionlabel_prefix_document = True
+
+# autodoc settings
+autodoc_default_options = {
+    "show-inheritance": False,
+    "inherited-members": False,
+    "members": True,
+}
+
+add_module_names = False
+autosummary_generate = False  # Don't generate rst files automatically from autosummary
+autoclass_content = "class"  # Don't document class __init__'s
+autodoc_typehints = (
+    "both"  # Show typehints in the signature + as content of the function
+)
+autodoc_typehints_format = "short"  # Shorten type hints
+autodoc_member_order = (
+    "groupwise"  # Sort automatically documented members by member type
+)
+python_use_unqualified_type_names = True  # Suppress module names
+
+# # This is the expected signature of the handler for this event, cf doc
+# def autodoc_skip_member_handler(app, what, name, obj, skip, options):
+#     # Basic approach; you might want a regex instead
+#     return name.endswith("__")
+#
+#
+# # Automatically called by sphinx at startup
+# def setup(app):
+#     # Connect the autodoc-skip-member event from apidoc to the callback
+#     app.connect("autodoc-skip-member", autodoc_skip_member_handler)
+#
+
+# Bibliography
+bibtex_bibfiles = []  # ["refs.bib"]
+# Workaround to cite the same paper in several places in the API docs
+suppress_warnings = ["bibtex.duplicate_label"]
+
+
+# Napoleon settings
+# https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html
+
+napoleon_google_docstring = False
+napoleon_numpy_docstring = True
+napoleon_preprocess_types = True  # generate hyperlinks for parameter types
+
+napoleon_type_aliases = {}
+
+# Add any paths that contain templates here, relative to this directory.
+# templates_path = []
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = [".rst", ".md"]
+# source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = "index"
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = "en"
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = None
+
+# Remove the prompt when copying examples
+copybutton_prompt_text = ">>> "
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+html_theme_options = {
+    "analytics_id": "UA-83738774-2",
+    "logo_only": True,
+    "display_version": True,
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself.  Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+html_sidebars = {}
+
+# This must be the name of an image file (path relative to the configuration
+# directory) that is the favicon of the docs. Modern browsers use this as
+# the icon for tabs, windows and bookmarks. It should be a Windows-style
+# icon file (.ico).
+html_favicon = "_static/favicon.ico"
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "fedecadoc"
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+
+# -- Options for Epub output -------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = project
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+
+# A unique identification for the text.
+#
+# epub_uid = ''
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ["search.html"]
+
+
+# As we defined the type of our args, auto doc is trying to find a link to a
+# documentation for each type specified
+# The following elements are the link that auto doc were not able to do
+nitpick_ignore = [
+    ("py:class", "pd.Series"),
+    ("py:class", "pd.DataFrame"),
+    ("py:class", "ndarray"),
+    ("py:class", "np.ndarray"),
+    ("py:class", "numpy._typing._generic_alias.ScalarType"),
+    ("py:class", "pydantic.main.BaseModel"),
+    ("py:class", "torch.nn.modules.module.Module"),
+    ("py:class", "torch.nn.modules.loss._Loss"),
+    ("py:class", "torch.optim.optimizer.Optimizer"),
+    ("py:class", "torch.optim.lr_scheduler._LRScheduler"),
+    ("py:class", "torch.device"),
+    ("py:class", "torch.utils.data.dataset.Dataset"),
+    ("py:class", "substrafl.algorithms.pytorch.torch_base_algo.TorchAlgo"),
+    ("py:class", "torch.nn.Module"),
+    ("py:class", "substrafl.strategies.schemas.StrategyName"),
+    ("py:class", "substratools.opener.Opener"),
+    ("py:class", "typing.List[StrategyName]"),
+]
+
+html_css_files = [
+    "fonts.css",
+    "owkin.css",
+    "sidebar.css",
+]
+
+# html_logo = "_static/pydeseq2_logo.svg"
+html_show_sourcelink = False
+html_show_sphinx = True
+
+# sphinx_gallery_conf = {
+#     "examples_dirs": "../../examples",  # path to your example scripts
+#     "gallery_dirs": "auto_examples",  # path to where to save gallery generated output
+#     "binder": {
+#         "org": "Owkin",
+#         "repo": "PyDESeq2",
+#         "branch": current_commit,  # Can be any branch, tag, or commit hash.
+#         # Use a branch that hosts your docs.
+#         "binderhub_url": "https://mybinder.org",  # public binderhub url
+#         "dependencies": str(Path(__file__).parents[2] / "environment.yml"),
+#         "notebooks_dir": "jupyter_notebooks",
+#         "use_jupyter_lab": True,
+#     },
+# }
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 00000000..5722288d
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,57 @@
+FedECA documentation
+======================
+
+This package allows to perform both simulations and deployments of federated
+external control arms (FedECA) analyses.
+
+Before using this code make sure to: 
+
+#. read and accept the terms of the license license.md that can be found at the root of the repository.
+#. read `substra's privacy strategy <https://docs.substra.org/en/stable/additional/privacy-strategy.html>`_
+#. read our associated technical article
+#. `activate secure rng in Opacus <https://opacus.ai/docs/faq#:~:text=What%20is%20the%20secure_rng,the%20security%20this%20brings.>`_ if you plan on using differential privacy.
+
+
+
+Citing this work
+----------------
+
+::
+
+   @article{duterrailfedeca2023,
+   title={FedECA: A Federated External Control Arm Method for Causal Inference with Time-To-Event Data in Distributed Settings},
+   author={Ogier du Terrail, Jean and Klopfenstein, Quentin and Li Honghao and Mayer, Imke and Loiseau, Nicolas and Hallal Mohammad and Balazard, Félix and Andreux, Mathieu},
+   year={2023},
+   doi = {no.doi.yet},
+   journal={arXiv},
+   }
+
+License
+-------
+
+FedECA is released under a custom license that can be found under license.md at the root of the repository.
+
+.. toctree::
+   :maxdepth: 0
+   :caption: Installation
+   
+   installation
+
+.. toctree::
+   :maxdepth: 0
+   :caption: Getting Started Instructions
+   
+   quickstart
+
+.. toctree::
+   :hidden:
+   :maxdepth: 4
+   :caption: API
+
+   api/fedeca
+   api/competitors
+   api/algorithms
+   api/metrics
+   api/scripts
+   api/strategies
+   api/utils
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
new file mode 100644
index 00000000..a3281c00
--- /dev/null
+++ b/docs/source/installation.rst
@@ -0,0 +1,22 @@
+
+Installation
+============
+
+To install the package, create an env with python ``3.9`` with conda
+
+.. code-block:: bash
+
+   conda create -n fedeca python=3.9
+   conda activate fedeca
+
+Within the environment, install the package by running:
+
+.. code-block::
+
+   git clone https://github.com/owkin/fedeca.git
+   pip install -e ".[all_extra]"
+
+If you plan developing, you should also install the pre-commit hooks
+
+```bash
+pre-commit install
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
new file mode 100644
index 00000000..9f573291
--- /dev/null
+++ b/docs/source/quickstart.rst
@@ -0,0 +1,178 @@
+
+Quickstart
+----------
+
+FedECA tries to mimic scikit-learn API as much as possible with the constraints
+of distributed learning.
+The first step in data science is always the data.
+We need to first use or generate some survival data in pandas.dataframe format.
+Note that fedeca should work on any data format, provided that the
+return type of the substra opener is indeed a pandas.dataframe but let's keep
+it simple in this quickstart.
+
+Here we will use fedeca utils which will generate some synthetic survival data
+following CoxPH assumptions:
+
+.. code-block:: python
+
+   import pandas as pd
+   from fedeca.utils.survival_utils import CoxData
+   # Let's generate 1000 data samples with 10 covariates
+   data = CoxData(seed=42, n_samples=1000, ndim=10)
+   df = data.generate_dataframe()
+
+   # We remove the true propensity score
+   df = df.drop(columns=["propensity_scores"], axis=1)
+
+Let's inspect the data that we have here.
+
+.. code-block:: python
+
+   print(df.info())
+   # <class 'pandas.core.frame.DataFrame'>
+   # RangeIndex: 1000 entries, 0 to 999
+   # Data columns (total 13 columns):
+   #  #   Column     Non-Null Count  Dtype
+   # ---  ------     --------------  -----
+   #  0   X_0        1000 non-null   float64
+   #  1   X_1        1000 non-null   float64
+   #  2   X_2        1000 non-null   float64
+   #  3   X_3        1000 non-null   float64
+   #  4   X_4        1000 non-null   float64
+   #  5   X_5        1000 non-null   float64
+   #  6   X_6        1000 non-null   float64
+   #  7   X_7        1000 non-null   float64
+   #  8   X_8        1000 non-null   float64
+   #  9   X_9        1000 non-null   float64
+   #  10  time       1000 non-null   float64
+   #  11  event      1000 non-null   uint8
+   #  12  treatment  1000 non-null   uint8
+   # dtypes: float64(11), uint8(2)
+   # memory usage: 88.0 KB
+   print(df.head())
+   #         X_0       X_1       X_2       X_3       X_4       X_5       X_6       X_7       X_8       X_9      time  event  treatment
+   # 0 -0.918373 -0.814340 -0.148994  0.482720 -1.130384 -1.254769 -0.462002  1.451622  1.199705  0.133197  2.573516      1          1
+   # 1  0.360051 -0.863619  0.198673  0.330630 -0.189184 -0.802424 -1.694990 -0.989009 -0.421245 -0.112665  0.519108      1          1
+   # 2  0.442502  0.024682  0.069500 -0.398015 -0.521236 -0.824907  0.373018  1.016843  0.765661  0.858817  0.652803      1          1
+   # 3 -0.783965 -1.116391 -1.482413 -2.039827 -1.639304 -0.500380 -0.298467 -1.801688 -0.743004 -0.724039  0.074925      1          1
+   # 4 -0.199620 -0.652347 -0.018776  0.004630 -0.122242 -0.413490 -0.450718 -0.761894 -1.323135 -0.234899  0.006951      1          1
+   print(df["treatment"].unique())
+   # array([1, 0], dtype=uint8)
+   df["treatment"].sum()
+   # 500
+
+So we have survival data with covariates and a binary treatment variable.
+Let's inspect it using proper survival plots using the great survival analysis
+package `lifelines <https://github.com/CamDavidsonPilon/lifelines>`_ that was a
+source of inspiration for fedeca:
+
+.. code-block:: python
+
+   from lifelines import KaplanMeierFitter as KMF
+   import matplotlib.pyplot as plt
+   treatments = [0, 1]
+   kms = [KMF().fit(durations=df.loc[df["treatment"] == t]["time"], event_observed=df.loc[df["treatment"] == t]["event"]) for t in treatments]
+
+   axs = [km.plot(label="treated" if t == 1 else "untreated") for km, t in zip(kms, treatments)]
+   axs[-1].set_ylabel("Survival Probability")
+   plt.xlim(0, 1500)
+   plt.savefig("treated_vs_untreated.pdf", bbox_inches="tight")
+
+Open ``treated_vs_untreated.pdf`` in your favorite pdf viewer and see for yourself.
+
+Pooled IPTW analysis
+--------------------
+
+The treatment seems to improve survival but it's hard to say for sure as it might
+simply be due to chance or sampling bias.
+Let's perform an IPTW analysis to be sure:
+
+.. code-block:: python
+
+   from fedeca.competitors import PooledIPTW
+   pooled_iptw = PooledIPTW(treated_col="treatment", event_col="event", duration_col="time")
+   # Targets is the propensity weights
+   pooled_iptw.fit(data=df, targets=None)
+   print(pooled_iptw.results_)
+   #                coef  exp(coef)  se(coef)  coef lower 95%  coef upper 95%  exp(coef) lower 95%  exp(coef) upper 95%  cmp to         z         p  -log2(p)
+   # covariate
+   # treatment  0.041727    1.04261  0.070581       -0.096609        0.180064             0.907911             1.197294     0.0  0.591196  0.554389   0.85103
+
+When looking at the ``p-value=0.554389 > 0.05``\ , thus judging by what we observe we
+cannot say for sure that there is a treatment effect. We say the ATE is non significant.
+
+Distributed Analysis
+--------------------
+
+However in practice data is private and held by different institutions. Therefore
+in practice each client holds a subset of the rows of our dataframe.
+We will simulate this using a realistic scenario where a "pharma" node is developing
+a new drug and thus holds all treated and the rest of the data is split across
+3 other institutions where patients were treated with the old drug.
+We will use the split utils of FedECA.
+
+.. code-block:: python
+
+   from fedeca.utils.data_utils import split_dataframe_across_clients
+
+   clients, train_data_nodes, _, _, _ = split_dataframe_across_clients(
+       df,
+       n_clients=4,
+       split_method= "split_control_over_centers",
+       split_method_kwargs={"treatment_info": "treatment"},
+       data_path="./data",
+       backend_type="simu",
+   )
+
+Note that you can replace split_method by any callable with the signature
+``pd.DataFrame -> list[int]`` where the list of ints is the split of the indices
+of the df across the different institutions.
+To convince you that the split was effective you can inspect the folder "./data".
+You will find different subfolders ``center0`` to ``center3`` each with different
+parts of the data.
+To unpack a bit what is going on in more depth, we have created a dict of client
+'clients',
+which is a dict with 4 keys containing substra API handles towards the different
+institutions and their data.
+``train_data_nodes`` is a list of handles towards the datasets of the different institutions
+that were registered through the substra interface using the data in the different
+folders.
+You might have noticed that we did not talk about the ``backend_type`` argument. 
+This argument is used to choose on which network will experiments be run.
+"simu" means in-RAM. If you finish this tutorial do try other values such as:
+"docker" or "subprocess" but expect a significant slow-down as experiments
+get closer and closer to a real distributed system.
+
+Now let's try to see if we can reproduce the pooled anaysis in this much more
+complicated distributed setting:
+
+.. code-block:: python
+
+   from fedeca import FedECA
+   # We use the first client as the node, which launches order
+   ds_client = clients[list(clients.keys())[0]]
+   fed_iptw = FedECA(ndim=10, ds_client=ds_client, train_data_nodes=train_data_nodes, treated_col="treatment", duration_col="time", event_col="event", robust=True)
+   fed_iptw.run()
+   # Final partial log-likelihood:
+   # [-11499.19619422]
+   #        coef  se(coef)  coef lower 95%  coef upper 95%         z         p  exp(coef)  exp(coef) lower 95%  exp(coef) upper 95%
+   # 0  0.041718  0.070581       -0.096618        0.180054  0.591062  0.554479     1.0426             0.907902             1.197282
+
+In fact what we did above is both quite verbose. For simulation purposes we
+advise to use directly the scikit-learn inspired syntax:
+
+.. code-block:: python
+
+   from fedeca import FedECA
+
+   fed_iptw = FedECA(ndim=10, treated_col="treatment", event_col="event", duration_col="time")
+   fed_iptw.fit(df, n_clients=4, split_method="split_control_over_centers", split_method_kwargs={"treatment_info": "treatment"}, data_path="./data", robust=True, backend_type="simu")
+   #        coef  se(coef)  coef lower 95%  coef upper 95%         z         p  exp(coef)  exp(coef) lower 95%  exp(coef) upper 95%
+   # 0  0.041718  0.070581       -0.096618        0.180054  0.591062  0.554479     1.0426             0.907902             1.197282
+
+We find a similar p-value ! The distributed analysis is working as expected.
+We recommend to users that made it to here as a next step to use their own data
+and write custom split functions and to test this pipeline under various
+heterogeneity settings.
+Another interesting avenue is to try adding differential privacy to the training
+of the propensity model but that is outside the scope of this quickstart. 
diff --git a/experiments/README.md b/experiments/README.md
new file mode 100644
index 00000000..4c971eeb
--- /dev/null
+++ b/experiments/README.md
@@ -0,0 +1,143 @@
+### The `experiments` module
+```shell
+experiments/
+├── config/
+│   ├── data/
+│   │   └── cox_data.yaml
+│   ├── default_config.yaml
+│   ├── experiment/
+│   │   ├── example.yaml
+│   │   └── ...
+│   ├── model/
+│   │   ├── common.yaml
+│   │   ├── covariate_adjusted.yaml
+│   │   ├── fl_iptw.yaml
+│   │   ├── matching_adjusted.yaml
+│   │   ├── naive_comparison.yaml
+│   │   └── pooled_iptw.yaml
+└── run_experiment.py
+```
+
+#### Configuration files
+Experiments are defined by composing different configuration groups under `config`.
+* `config/data` contains data generation models
+* `config/model` contains estimation models
+* `config/experiment` contains assembled experiment settings
+
+#### Running experiments
+`config/experiment/example.yaml` provides an example of experiment setting to help users
+assemble more complex experiments. To run the experiment defined by `example.yaml`:
+```shell
+python -m experiments.run_experiment -m experiment=example
+```
+To run user-defined experiments, read and adapt (with copy) `example.yaml`, put the new
+config file under `config/experiment`, then run the above command modifying the
+`experiment` kwarg.
+
+### User guide to generate figures of the FedECA article
+---
+#### Figure 2: Pooled IPTW vs. Fedeca
+To reproduce the results on the relative error between FedECA and pooled IPTW, run the following command line:
+```
+python -m experiments.run_experiment -m experiment=robust_pooled_equivalent
+```
+
+To change some parameters of the experiment, the config file is in `experiments/config/experiment/robust_pooled_equivalent.yaml`.
+
+Once done, the results are saved in the shared folder on Abstra, `/home/owkin/project/results_experiments/pooled_equivalent/results_Robust_Pooled_Equivalent.pkl`. 
+
+In order to plot the figure, simply run the following python script
+```
+python /experiments/robust_pooled_equivalent/robust_plot_pooled_equivalent.py
+```
+The figure will be saved in a pdf format in your current directory.
+
+To obtain the equivalent figure wo using robust just drop all robust_ prefixes.
+
+
+---
+#### Figure 3: Statistical power and Type I error benchmark.
+To reproduce the results on statistical power and type I error, run the following commands:
+```
+python -m experiments.run_experiment -m experiment=power_and_type_one_error_cov_shift
+python -m experiments.run_experiment -m experiment=power_and_type_one_error_n_samples
+```
+
+To change some parameters of the experiment, the config files are:
+* `experiments/config/experiment/power_and_type_one_error_cov_shift.yaml`
+* `experiments/config/experiment/power_and_type_one_error_n_samples.yaml`
+
+Once done, the results are saved in the shared folder on Abstra:
+* `/home/owkin/project/results_experiments/power_and_type_one_error_cov_shift/results_Power_and_type_one_error_analyses.pkl`.
+* `/home/owkin/project/results_experiments/power_and_type_one_error_n_samples/results_Power_and_type_one_error_analyses.pkl`. 
+
+In order to plot the figure, simply run the following command:
+```
+python /experiments/power/plot_power_type_one_error.py
+```
+The figure with 4 subfigures (power and type I error, varied covariate shift and number
+of samples) will be saved in a pdf format in your current directory.
+
+---
+#### Figure S2: DP-FedECA
+
+To reproduce the results on relative errors with changing DP params, run the following command line:
+```
+python -m experiments.run_experiment -m experiment=pooled_equivalence_dp
+```
+
+To change some parameters of the experiment, the config file is in `experiments/config/experiment/pooled_equivalence_dp.yaml`.
+Once done, the results are saved in the shared folder on Abstra, `results_Pooled_equivalent_DP.pkl`.
+Currently it's stored in the repository in `pooled_equivalent_dp folder`. 
+In order to plot the figure, put the pickle on abstra and simply run the following python script
+```
+python /experiments/dp/plot_dp_hydra.py
+```
+
+---
+#### Table 1: Real-world experiments
+
+To reproduce the results on timings, first create and download API tokens from
+the demo-env from all 3 organizations. Create a tokens folder in your current
+directory and copy paste the tokens in api_key1 / ai_key2 and api_key3 files
+corresondiing to org1 / org2 and org3.
+Then run the following command line:
+```
+python -m experiments.run_experiment -m experiment=real_world_runtimes
+```
+
+To change some parameters of the experiment, the config file is in `experiments/config/experiment/real_world_runtimes.yaml`.
+Once done, the results are saved in the shared folder on Abstra, `results_Real-world_experiments.pkl`.
+In order to plot the figure, put the pickle on abstra and simply run the following python script
+```
+python ./experiments/real-world/plot_real_world_hydra.py
+```
+
+---
+#### Figure S1: pooled equivalent
+To reproduce the results on the relative error between FedECA and pooled IPTW with increasing the number of clients, run the following command line:
+```
+python -m experiments.run_experiment -m experiment=robust_pooled_equivalent_nb_clients
+```
+
+To change some parameters of the experiment, the config file is in `experiments/config/experiment/robust_pooled_equivalent_nb_clients.yaml`.
+
+Once done, the results are saved in the shared folder on Abstra, `/home/owkin/project/results_experiments/robust_pooled_equivalent/results_Robust_Pooled_Equivalent_nb_clients.pkl`. 
+
+In order to plot the figure, simply run the following python script
+```
+python /experiments/pooled_equivalent/robust_plot_pooled_equivalent_nb_clients.py
+```
+The figure will be saved in a pdf format in your current directory.
+
+For the figure illustrating the effect of ties run:
+
+```
+python -m experiments.run_experiment -m experiment=pooled_equivalent_ties
+```
+Then plot with:
+
+```
+python ./experiments/ties/plot_ties_hydra.py
+```
+
diff --git a/experiments/config/data/cox_data.yaml b/experiments/config/data/cox_data.yaml
new file mode 100644
index 00000000..e6498149
--- /dev/null
+++ b/experiments/config/data/cox_data.yaml
@@ -0,0 +1,16 @@
+_target_: fedeca.utils.survival_utils.CoxData
+ndim: 10
+features_type: "cov_toeplitz"
+cate: 1.0
+propensity: "constant"
+prop_treated: 0.5
+overlap: 0.0
+cov_corr: 0.5
+scale_t: 1.0
+shape_t: 1.0
+censoring_factor: 0.5
+percent_ties: null
+random_censoring: False
+seed: 42
+standardize_features: True
+dtype: "float64"
diff --git a/experiments/config/default_config.yaml b/experiments/config/default_config.yaml
new file mode 100644
index 00000000..f37ad559
--- /dev/null
+++ b/experiments/config/default_config.yaml
@@ -0,0 +1,30 @@
+name: "Default configuration"
+
+# Use default list to load predefined configs
+defaults:
+  - data: cox_data
+  # Select "model/common.yaml" and assign it to `models_common`
+  - model@models_common: common
+  # For each experiment we need to specify a dictionary of `models` (models of
+  # different dict keys can have the same model as values). By default no model
+  # is specified, see "experiment/example.yaml" for instructions.
+  - _self_
+  # Specification of each experiment, to be overriden by configs under the group
+  # "experiment".
+  - experiment: null
+
+parameters:
+  # Number of repetitions of each experiment
+  n_reps: 10
+  # Number of samples of the generated dataset in each repetition
+  n_samples: 100
+  # Whether to return propensity scores of each model in the results
+  return_propensities: True
+  # Whether to return computed weights of each model in the results
+  return_weights: True
+
+# initial_seed is used to generate seed for each run (set of parameters)
+initial_seed: 42
+
+hydra:
+  mode: "MULTIRUN"
diff --git a/experiments/config/experiment/example.yaml b/experiments/config/experiment/example.yaml
new file mode 100644
index 00000000..ca79754f
--- /dev/null
+++ b/experiments/config/experiment/example.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+# An example of experiment config that extends "default_config.yaml".
+# First line is crucial, don't forget to add it!
+name: "Example experiment"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+# modify data generation model, full list of params available at data/cox_data
+data:
+  ndim: 5
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  # To setup the dict "models" with predefined models in the group "model",
+  # we need to use the default list. The following line add the key-value pair
+  # ("IPTW", pooled_iptw) to the dict/package `models`, where `pooled_iptw` is a
+  # predefined config in "model" ("model/pooled_iptw.yaml")
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.OracleIPTW: pooled_iptw
+  - /model@models.MAIC: matching_adjusted
+  - /model@models.Naive: naive_comparison
+  - /model@models.new_model: pooled_iptw
+  - _self_
+
+# modify/extend specific models or add your own
+models:
+  # modify pre-defined
+  IPTW:
+    effect: "ATE"
+  new_model:
+    effect: "ATT"
+  # Add new one: _target_ is the class, the rest is the initial kwargs
+  my_naive:
+    _target_: fedeca.competitors.NaiveComparison
+    treated_col: "treated"
+    event_col: "event"
+    duration_col: "time"
+    ps_col: "propensity_scores"
+
+# In case you want to modify/add anything common to all models, parameters
+# listed here will override for all models before initialization
+models_common:
+  treated_col: "TREATED"
+
+parameters:
+  # Whether to return propensity scores of each model in the results
+  return_propensities: False
+  # Whether to return computed weights of each model in the results
+  return_weights: False
+
+hydra:
+  sweep:
+    # You can specify output dir, but it's recommended to keep it unique
+    # for each experiment, for example, by using the default value which uses
+    # the timestamp of execution.
+    dir: "example_results"
+  sweeper:
+    params:
+      parameters.n_samples: range(200,700,100)
diff --git a/experiments/config/experiment/pooled_equivalence_dp.yaml b/experiments/config/experiment/pooled_equivalence_dp.yaml
new file mode 100644
index 00000000..5d142de2
--- /dev/null
+++ b/experiments/config/experiment/pooled_equivalence_dp.yaml
@@ -0,0 +1,57 @@
+# @package _global_
+name: "Pooled equivalent DP"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    effect: "ATE"
+    cox_fit_kwargs:
+      robust: True
+  FedECA:
+    ndim: 10
+    num_rounds_list: [50, 50]
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 3
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  dp_max_grad_norm: 1.
+  dp_target_delta: 0.001
+  dp_propensity_model_training_params: {"batch_size": 100, "num_updates": 100}
+  dp_propensity_model_optimizer_kwargs: {"lr": 1e-2}
+  backend_type: "simu"
+  robust: True
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1_000
+  n_reps: 5
+
+hydra:
+  sweep:
+    dir: "pooled_equivalent_dp"
+  sweeper:
+    params:
+      +fit_fedeca.dp_target_epsilon: 0.1, 0.13869189, 0.19235439, 0.26677993, 0.37000212, 0.51316292, 0.71171532, 0.9870914 , 1.36901568, 1.89871366, 2.63336178, 3.65225911, 5.06538703, 7.02528079, 9.7434944, 13.51343612, 18.74203937, 25.99368781, 36.05113577, 50.
+      ++fit_fedeca.dp_target_delta: 0.001, 0.01, 0.1
+
diff --git a/experiments/config/experiment/pooled_equivalence_ties.yaml b/experiments/config/experiment/pooled_equivalence_ties.yaml
new file mode 100644
index 00000000..375d63f4
--- /dev/null
+++ b/experiments/config/experiment/pooled_equivalence_ties.yaml
@@ -0,0 +1,54 @@
+# @package _global_
+name: "Pooled equivalent ties"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    effect: "ATE"
+    cox_fit_kwargs:
+      robust: True
+  FedECA:
+    ndim: 10
+    num_rounds_list: [50, 50]
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 3
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  backend_type: "simu"
+  robust: True
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1_000
+  n_reps: 5
+  return_propensities: True
+  return_weights: True
+
+hydra:
+  sweep:
+    dir: "pooled_equivalent_ties"
+  sweeper:
+    params:
+      ++data.percent_ties: null, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9
+
diff --git a/experiments/config/experiment/pooled_equivalent.yaml b/experiments/config/experiment/pooled_equivalent.yaml
new file mode 100644
index 00000000..566af629
--- /dev/null
+++ b/experiments/config/experiment/pooled_equivalent.yaml
@@ -0,0 +1,46 @@
+# @package _global_
+name: "Pooled equivalent"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    effect: "ATE"
+    cox_fit_kwargs:
+      robust: False
+  FedECA:
+    ndim: 10
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 3
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  data_path: "/home/owkin/federated-eca-code/experiments/simulated_cox/iptw_experiment/"
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1_000
+  n_reps: 100
+
+hydra:
+  sweep:
+    dir: "/home/owkin/project/results_experiments/pooled_equivalent"
diff --git a/experiments/config/experiment/pooled_equivalent_nb_clients.yaml b/experiments/config/experiment/pooled_equivalent_nb_clients.yaml
new file mode 100644
index 00000000..5a51b289
--- /dev/null
+++ b/experiments/config/experiment/pooled_equivalent_nb_clients.yaml
@@ -0,0 +1,49 @@
+# @package _global_
+name: "Pooled equivalent nb clients"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    effect: "ATE"
+    cox_fit_kwargs:
+      robust: False
+  FedECA:
+    ndim: 10
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 3
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  data_path: "/home/owkin/federated-eca-code/experiments/simulated_cox/iptw_experiment/"
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1_000
+  n_reps: 100
+
+hydra:
+  sweep:
+    dir: "/home/owkin/project/results_experiments/pooled_equivalent"
+  sweeper:
+    params:
+      fit_fedeca.n_clients: 2, 3, 5, 10
diff --git a/experiments/config/experiment/power_and_type_one_error_cov_shift.yaml b/experiments/config/experiment/power_and_type_one_error_cov_shift.yaml
new file mode 100644
index 00000000..75e44bf6
--- /dev/null
+++ b/experiments/config/experiment/power_and_type_one_error_cov_shift.yaml
@@ -0,0 +1,57 @@
+# @package _global_
+name: "Power and type one error analyses"
+
+initial_seed: 20
+
+data:
+  ndim: 10
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW_bootstrap: pooled_iptw
+  - /model@models.IPTW_robust: pooled_iptw
+  - /model@models.IPTW_naive: pooled_iptw
+  - /model@models.MAIC_bootstrap: matching_adjusted
+  - /model@models.MAIC_robust: matching_adjusted
+  - /model@models.Unweighted: naive_comparison
+  - _self_
+
+n_bootstrap: 200
+
+models:
+  IPTW_bootstrap:
+    variance_method: "bootstrap"
+    n_bootstrap: ${n_bootstrap}
+  IPTW_robust:
+    variance_method: "robust"
+  IPTW_naive:
+    variance_method: "naive"
+  MAIC_bootstrap:
+    variance_method: "bootstrap"
+    n_bootstrap: ${n_bootstrap}
+  MAIC_robust:
+    variance_method: "robust"
+  Unweighted:
+    variance_method: "naive"
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 700
+  n_reps: 1000
+  return_propensities: False
+  return_weights: False
+
+hydra:
+  sweep:
+    dir: "/home/owkin/project/results_experiments/power_and_type_one_error_cov_shift"
+  sweeper:
+    params:
+      data.cate: 1.0,0.4
+      data.overlap: range(-1,4,1)
diff --git a/experiments/config/experiment/power_and_type_one_error_n_samples.yaml b/experiments/config/experiment/power_and_type_one_error_n_samples.yaml
new file mode 100644
index 00000000..5aa95a3d
--- /dev/null
+++ b/experiments/config/experiment/power_and_type_one_error_n_samples.yaml
@@ -0,0 +1,58 @@
+# @package _global_
+name: "Power and type one error analyses"
+
+initial_seed: 10
+
+data:
+  ndim: 10
+  scale_t: 10.0
+  shape_t: 3.0
+  overlap: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW_bootstrap: pooled_iptw
+  - /model@models.IPTW_robust: pooled_iptw
+  - /model@models.IPTW_naive: pooled_iptw
+  - /model@models.MAIC_bootstrap: matching_adjusted
+  - /model@models.MAIC_robust: matching_adjusted
+  - /model@models.Unweighted: naive_comparison
+  - _self_
+
+n_bootstrap: 200
+
+models:
+  IPTW_bootstrap:
+    variance_method: "bootstrap"
+    n_bootstrap: ${n_bootstrap}
+  IPTW_robust:
+    variance_method: "robust"
+  IPTW_naive:
+    variance_method: "naive"
+  MAIC_bootstrap:
+    variance_method: "bootstrap"
+    n_bootstrap: ${n_bootstrap}
+  MAIC_robust:
+    variance_method: "robust"
+  Unweighted:
+    variance_method: "naive"
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1000
+  n_reps: 1000
+  return_propensities: False
+  return_weights: False
+
+hydra:
+  sweep:
+    dir: "/home/owkin/project/results_experiments/power_and_type_one_error_n_samples"
+  sweeper:
+    params:
+      data.cate: 1.0,0.4
+      parameters.n_samples: range(300,1101,200)
diff --git a/experiments/config/experiment/real_world_runtimes.yaml b/experiments/config/experiment/real_world_runtimes.yaml
new file mode 100644
index 00000000..fc0af1cb
--- /dev/null
+++ b/experiments/config/experiment/real_world_runtimes.yaml
@@ -0,0 +1,58 @@
+# @package _global_
+name: "Real-world experiments"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    effect: "ATE"
+    cox_fit_kwargs:
+      robust: False
+  FedECA:
+    ndim: 10
+    num_rounds_list: [50, 50]
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 3
+  robust: True
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  data_path: "/home/owkin/real-world/"
+  # We start on the second url the first is reserved for the server
+  urls: ["https://api.org-2.fedeca-rw.cg.owkin.tech", "https://api.org-3.fedeca-rw.cg.owkin.tech", "https://api.org-4.fedeca-rw.cg.owkin.tech", "https://api.org-5.fedeca-rw.cg.owkin.tech", "https://api.org-6.fedeca-rw.cg.owkin.tech", "https://api.org-7.fedeca-rw.cg.owkin.tech", "https://api.org-8.fedeca-rw.cg.owkin.tech", "https://api.org-9.fedeca-rw.cg.owkin.tech", "https://api.org-10.fedeca-rw.cg.owkin.tech", "https://api.org-11.fedeca-rw.cg.owkin.tech"]
+  server_org_id: 'MyOrg1MSP'
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1_000
+  n_reps: 5
+
+hydra:
+  sweep:
+    dir: "real-world"
+  sweeper:
+    params:
+      # 9 will not be kept but used as a warm-up for the server
+      ++fit_fedeca.n_clients: 9, 10, 8, 5, 3, 2
+      +fit_fedeca.backend_type: remote, simu
+      
+
diff --git a/experiments/config/experiment/robust_pooled_equivalent.yaml b/experiments/config/experiment/robust_pooled_equivalent.yaml
new file mode 100644
index 00000000..896b89ac
--- /dev/null
+++ b/experiments/config/experiment/robust_pooled_equivalent.yaml
@@ -0,0 +1,48 @@
+# @package _global_
+name: "Robust Pooled Equivalent"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    effect: "ATE"
+    cox_fit_kwargs:
+      robust: True
+  FedECA:
+    ndim: 10
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 3
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  data_path: "/home/owkin/federated-eca-code/experiments/simulated_cox/iptw_experiment/"
+  robust: True
+  backend_type: "simu"
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1_000
+  n_reps: 100
+
+hydra:
+  sweep:
+    dir: "/home/owkin/project/results_experiments/robust_pooled_equivalence"
diff --git a/experiments/config/experiment/robust_pooled_equivalent_nb_clients.yaml b/experiments/config/experiment/robust_pooled_equivalent_nb_clients.yaml
new file mode 100644
index 00000000..4bef0d96
--- /dev/null
+++ b/experiments/config/experiment/robust_pooled_equivalent_nb_clients.yaml
@@ -0,0 +1,51 @@
+# @package _global_
+name: "Robust Pooled Equivalent nb clients"
+
+# initial_seed is used to generate seed for each run
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    effect: "ATE"
+    cox_fit_kwargs:
+      robust: True
+  FedECA:
+    ndim: 10
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 3
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  data_path: "/home/owkin/federated-eca-code/experiments/simulated_cox/iptw_experiment/"
+  robust: True
+  backend_type: "simu"
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1_000
+  n_reps: 100
+
+hydra:
+  sweep:
+    dir: "/home/owkin/project/results_experiments/robust_pooled_equivalence"
+  sweeper:
+    params:
+      fit_fedeca.n_clients: 2, 3, 5, 10
diff --git a/experiments/config/experiment/smd_cov_shift.yaml b/experiments/config/experiment/smd_cov_shift.yaml
new file mode 100644
index 00000000..6ab636f2
--- /dev/null
+++ b/experiments/config/experiment/smd_cov_shift.yaml
@@ -0,0 +1,56 @@
+# @package _global_
+name: "smd_cov_shift"
+
+initial_seed: 10
+
+data:
+  ndim: 10
+  cate: 0.4
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: False
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.MAIC: matching_adjusted
+  - /model@models.FedECA: fl_iptw
+  - _self_
+
+models:
+  IPTW:
+    cox_fit_kwargs:
+      robust: True
+  MAIC:
+    cox_fit_kwargs:
+      robust: True
+  FedECA:
+    ndim: ${data.ndim}
+    fedeca_path: "/home/owkin/federated-eca-code/"
+
+# config fit FedECA
+fit_fedeca:
+  n_clients: 2
+  split_method: "split_control_over_centers"
+  split_method_kwargs: {"treatment_info": "treatment_allocation"}
+  data_path: "fl_data_overlap"
+  backend_type: "simu"
+  robust: True
+
+models_common:
+  treated_col: "treatment_allocation"
+  event_col: "event"
+  duration_col: "time"
+
+parameters:
+  n_samples: 1000
+  n_reps: 100
+  return_propensities: False
+  return_weights: False
+
+hydra:
+  sweep:
+    dir: "/home/owkin/project/results_experiments/smd_cov_shift"
+  sweeper:
+    params:
+      data.overlap: range(-1,4,1)
diff --git a/experiments/config/experiment/vary_overlap.yaml b/experiments/config/experiment/vary_overlap.yaml
new file mode 100644
index 00000000..56882e7d
--- /dev/null
+++ b/experiments/config/experiment/vary_overlap.yaml
@@ -0,0 +1,31 @@
+# @package _global_
+name: "Vary overlap"
+
+initial_seed: 42
+
+data:
+  ndim: 10
+  cate: 0.7
+  scale_t: 10.0
+  shape_t: 3.0
+  propensity: "linear"
+  standardize_features: True
+  prop_treated: 0.5
+
+defaults:
+  - /model@models.IPTW: pooled_iptw
+  - /model@models.OracleIPTW: pooled_iptw
+  - /model@models.MAIC: matching_adjusted
+  - /model@models.Naive: naive_comparison
+  - _self_
+
+parameters:
+  n_samples: 500
+  n_reps: 20
+
+hydra:
+  sweep:
+    dir: "vary_overlap"
+  sweeper:
+    params:
+      data.overlap: range(0,11,2)
diff --git a/experiments/config/hr_estimation_error_type_one.yaml b/experiments/config/hr_estimation_error_type_one.yaml
new file mode 100644
index 00000000..0abbaca5
--- /dev/null
+++ b/experiments/config/hr_estimation_error_type_one.yaml
@@ -0,0 +1,10 @@
+name: "power_comparison"
+
+seed: 42
+
+parameters:
+  experiments:
+    cate: 0.0
+    n_samples: [200, 300, 400, 500, 600, 700]
+    n_covariates: 5
+    n_repeats: 1000
diff --git a/experiments/config/hr_estimation_power.yaml b/experiments/config/hr_estimation_power.yaml
new file mode 100644
index 00000000..222aab42
--- /dev/null
+++ b/experiments/config/hr_estimation_power.yaml
@@ -0,0 +1,10 @@
+name: "power_comparison"
+
+seed: 42
+
+parameters:
+  experiments:
+    cate: 1.0
+    n_samples: [200, 300, 400, 500, 600, 700]
+    n_covariates: 5
+    n_repeats: 1000
diff --git a/experiments/config/model/common.yaml b/experiments/config/model/common.yaml
new file mode 100644
index 00000000..7c2ba44a
--- /dev/null
+++ b/experiments/config/model/common.yaml
@@ -0,0 +1,4 @@
+treated_col: "treated"
+event_col: "event"
+duration_col: "time"
+ps_col: "propensity_scores"
diff --git a/experiments/config/model/covariate_adjusted.yaml b/experiments/config/model/covariate_adjusted.yaml
new file mode 100644
index 00000000..c2671eb4
--- /dev/null
+++ b/experiments/config/model/covariate_adjusted.yaml
@@ -0,0 +1,4 @@
+defaults:
+  - common
+
+_target_: fedeca.competitors.CovariateAdjusted
diff --git a/experiments/config/model/fl_iptw.yaml b/experiments/config/model/fl_iptw.yaml
new file mode 100644
index 00000000..5b00381e
--- /dev/null
+++ b/experiments/config/model/fl_iptw.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - common
+
+_target_: fedeca.fedeca_core.FedECA
+num_rounds_list: [20, 20]
+dtype: "float64"
\ No newline at end of file
diff --git a/experiments/config/model/matching_adjusted.yaml b/experiments/config/model/matching_adjusted.yaml
new file mode 100644
index 00000000..245825f4
--- /dev/null
+++ b/experiments/config/model/matching_adjusted.yaml
@@ -0,0 +1,4 @@
+defaults:
+  - common
+
+_target_: fedeca.competitors.MatchingAjudsted
diff --git a/experiments/config/model/naive_comparison.yaml b/experiments/config/model/naive_comparison.yaml
new file mode 100644
index 00000000..18286342
--- /dev/null
+++ b/experiments/config/model/naive_comparison.yaml
@@ -0,0 +1,4 @@
+defaults:
+  - common
+
+_target_: fedeca.competitors.NaiveComparison
diff --git a/experiments/config/model/pooled_iptw.yaml b/experiments/config/model/pooled_iptw.yaml
new file mode 100644
index 00000000..7ee0a105
--- /dev/null
+++ b/experiments/config/model/pooled_iptw.yaml
@@ -0,0 +1,4 @@
+defaults:
+  - common
+
+_target_: fedeca.competitors.PooledIPTW
diff --git a/experiments/config/pooled_equivalent.yaml b/experiments/config/pooled_equivalent.yaml
new file mode 100644
index 00000000..15c07e57
--- /dev/null
+++ b/experiments/config/pooled_equivalent.yaml
@@ -0,0 +1,13 @@
+name: "pooled_equivalent"
+
+parameters:
+  experiments:
+    nb_clients: [3]
+    n_samples: 500
+    n_covariates: 10
+    n_repeat: 20
+    percent_ties: null
+    group_treated: False
+
+  fedeca:
+    nb_rounds_list: [20, 20]
diff --git a/experiments/config/pooled_equivalent_hardcore.yaml b/experiments/config/pooled_equivalent_hardcore.yaml
new file mode 100644
index 00000000..8c0850e4
--- /dev/null
+++ b/experiments/config/pooled_equivalent_hardcore.yaml
@@ -0,0 +1,13 @@
+name: "pooled_equivalent_hardcore"
+
+parameters:
+  experiments:
+    nb_clients: [3]
+    n_samples: 500
+    n_covariates: 10
+    n_repeat: 20
+    percent_ties: null
+    group_treated: True
+
+  fedeca:
+    nb_rounds_list: [20, 20]
diff --git a/experiments/dp/dp_plot.py b/experiments/dp/dp_plot.py
new file mode 100644
index 00000000..9a911c2d
--- /dev/null
+++ b/experiments/dp/dp_plot.py
@@ -0,0 +1,56 @@
+# Plot
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+sns.set_theme(style="darkgrid")
+
+
+results = pd.read_csv("results_logreg_dp_training.csv")
+results = results.rename(columns={"perf": "Performance"})
+linestyle_str = [
+    ("solid", "solid"),  # Same as (0, ()) or '-'
+    ("dotted", "dotted"),  # Same as (0, (1, 1)) or ':'
+    ("dashed", "dashed"),  # Same as '--'
+    ("dashdot", "dashdot"),
+]
+linestyle_tuple = [
+    ("loosely dotted", (0, (1, 10))),
+    ("densely dotted", (0, (1, 1))),
+    ("loosely dashed", (0, (5, 10))),
+    ("densely dashed", (0, (5, 1))),
+    ("loosely dashdotted", (0, (3, 10, 1, 10))),
+    ("densely dashdotted", (0, (3, 1, 1, 1))),
+    ("dashdotdotted", (0, (3, 5, 1, 5, 1, 5))),
+    ("loosely dashdotdotted", (0, (3, 10, 1, 10, 1, 10))),
+    ("densely dashdotdotted", (0, (3, 1, 1, 1, 1, 1))),
+]
+linestyles = linestyle_tuple + linestyle_str
+deltas = [d for d in results["d"].unique() if not (np.isnan(d))]
+fig, ax = plt.subplots()
+for i, d in enumerate(deltas):
+    cdf = results.loc[results["d"] == d]
+    sns.lineplot(
+        data=cdf,
+        x="e",
+        y="Performance",
+        label=rf"$\delta={d}$",
+        linestyle=linestyles[::-1][i][1],
+        ax=ax,
+    )
+ax.set_xscale("log")
+xtick_values = [d for d in results["e"].unique() if not (np.isnan(d))]
+xlabels = [str(v) for v in xtick_values]
+ax.set_xticks(xtick_values, xlabels)
+ax.axhline(
+    np.array(results.loc[results["d"].isnull(), "Performance"].tolist()).mean(),
+    color="black",
+    label="Baseline wo DP",
+)
+ax.set_xlim(0.1, 10.0)
+plt.legend()
+plt.xlim(0.1, 10.0)
+plt.xlabel(r"$\epsilon$")
+plt.ylabel("Performance")
+plt.savefig("DP_plot_propensity.pdf", dpi=100, bbox_inches="tight")
diff --git a/experiments/dp/plot_dp_hydra.py b/experiments/dp/plot_dp_hydra.py
new file mode 100644
index 00000000..71055722
--- /dev/null
+++ b/experiments/dp/plot_dp_hydra.py
@@ -0,0 +1,134 @@
+"""Plot file for the DP experiment."""
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+from fedeca.utils.constants import EXPERIMENTS_PATHS
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+
+# TODO use Owkin's palette
+# from fedeca.viz.plot import owkin_palette
+
+
+def relative_error(x, y, absolute_error=False):
+    """Compute the relative error."""
+    if absolute_error:
+        return np.abs(y - x) / np.abs(x)
+    else:
+        return np.linalg.norm(y - x) / np.linalg.norm(x)
+
+
+names = {
+    "Hazard Ratio": "hazard ratio",
+    "Partial Log likelihood": "likelihood",
+    "p-values": "p-values",
+    "Propensity scores": "scores",
+}
+cmp = sns.color_palette("colorblind")
+results = load_dataframe_from_pickles(
+    EXPERIMENTS_PATHS["dp_results"] + "results_Pooled_equivalent_DP.pkl"
+)
+
+results_fl = results.loc[results["method"] == "FedECA", :]
+results_pooled = results.loc[results["method"] == "IPTW", :]
+
+errors = pd.DataFrame(
+    data=np.abs(
+        np.array(results_fl["exp(coef)"]) - np.array(results_pooled["exp(coef)"])
+    )
+    / np.abs(np.array(results_pooled["exp(coef)"])),
+    columns=["hazard ratio"],
+)
+
+errors["likelihood"] = np.abs(
+    np.array(results_fl["log_likelihood"]) - np.array(results_pooled["log_likelihood"])
+) / np.abs(np.array(results_pooled["log_likelihood"]))
+
+errors["p-values"] = np.abs(
+    np.array(results_fl["p"]) - np.array(results_pooled["p"])
+) / np.abs(np.array(results_pooled["p"]))
+
+errors["scores"] = np.array(
+    [
+        relative_error(
+            np.array(results_pooled["propensity_scores"].iloc[i]),
+            np.array(results_fl["propensity_scores"].iloc[i]),
+        )
+        for i in range(results_fl.shape[0])
+    ]
+)
+errors["epsilon"] = results_fl["dp_target_epsilon"].values
+errors["delta"] = results_fl["dp_target_delta"].values
+
+
+# fig, axarr = plt.subplots(1, 1, figsize=(10, 5))
+# sns.boxplot(
+#     data=errors, palette=sns.color_palette(owkin_palette.values(), 9), width=0.5
+# )
+# ax = sns.swarmplot(data=errors, color=".25", size=4)
+
+# axarr.hlines(y=1e-2, xmin=-0.5, xmax=3.5, linewidth=2, color="r", linestyle="--")
+# axarr.set_yscale("log")
+# axarr.set_xticks(np.arange(errors.shape[1]), names)
+# axarr.set_title("Pooled IPTW versus FedECA")
+# axarr.set_ylabel("Relative error")
+# axarr.set_ylim((1e-9, 1))
+# plt.tight_layout()
+# plt.savefig("pooled_equivalent.pdf")
+
+
+linestyle_str = [
+    ("solid", "solid"),  # Same as (0, ()) or '-'
+    ("dotted", "dotted"),  # Same as (0, (1, 1)) or ':'
+    ("dashed", "dashed"),  # Same as '--'
+    ("dashdot", "dashdot"),
+]
+linestyle_tuple = [
+    ("loosely dotted", (0, (1, 10))),
+    ("densely dotted", (0, (1, 1))),
+    ("loosely dashed", (0, (5, 10))),
+    ("densely dashed", (0, (5, 1))),
+    ("loosely dashdotted", (0, (3, 10, 1, 10))),
+    ("densely dashdotted", (0, (3, 1, 1, 1))),
+    ("dashdotdotted", (0, (3, 5, 1, 5, 1, 5))),
+    ("loosely dashdotdotted", (0, (3, 10, 1, 10, 1, 10))),
+    ("densely dashdotdotted", (0, (3, 1, 1, 1, 1, 1))),
+]
+linestyles = linestyle_tuple + linestyle_str
+deltas = [d for d in errors["delta"].unique() if not (np.isnan(d))]
+
+for rel_error_name, col_name in names.items():
+    fig, ax = plt.subplots()
+    for i, d in enumerate(deltas):
+        cdf = errors.loc[errors["delta"] == d]
+        sns.lineplot(
+            data=cdf,
+            x="epsilon",
+            y=col_name,
+            label=rf"$\delta={d}$",
+            linestyle=linestyles[::-1][i][1],
+            ax=ax,
+        )
+    ax.set_xscale("log")
+    if col_name == "p-values" or col_name == "likelihood":
+        ax.set_yscale("log")
+    xtick_values = np.logspace(-1.0, 1.6989700043360185, 5, base=10)
+    xlabels = [str(round(v, 2)) for v in xtick_values]
+    ax.set_xticks(xtick_values, xlabels)
+    ax.axhline(
+        1e-2,
+        color="red",
+        label=None,
+        linestyle="--",
+    )
+    ax.set_xlim(0.1, 50.0)
+    plt.legend()
+    plt.xlim(0.1, 50.0)
+    plt.xlabel(r"$\epsilon$")
+    plt.ylabel("Relative Errors")
+    plt.tight_layout()
+    plt.savefig(
+        f"DP_relative_error_pooled_{rel_error_name}.pdf", dpi=100, bbox_inches="tight"
+    )
+    plt.clf()
diff --git a/experiments/estimate_ate/estimate_ate.py b/experiments/estimate_ate/estimate_ate.py
new file mode 100644
index 00000000..42ba312f
--- /dev/null
+++ b/experiments/estimate_ate/estimate_ate.py
@@ -0,0 +1,97 @@
+"""Simulation data for FedECA."""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from lifelines import KaplanMeierFitter
+
+from fedeca import MatchingAjudsted, NaiveComparison, PooledIPTW
+from fedeca.utils.survival_utils import CoxData
+
+coxdata = CoxData(
+    n_samples=250,
+    cate="linear",
+    seed=1234,
+    percent_ties=None,
+    ndim=20,
+    propensity="linear",
+    cov_corr=0.0,
+    features_type="indep_gauss",
+)
+X, times, censoring, treat_alloc = coxdata.generate_data()
+col_X = ["X_%i" % i for i in range(X.shape[1])]
+
+data = np.concatenate(
+    [X, times[:, np.newaxis], censoring[:, np.newaxis], treat_alloc[:, np.newaxis]],
+    axis=1,
+)
+
+data = pd.DataFrame(
+    data=data, columns=col_X + ["time", "event", "treatment_allocation"]
+)
+# define treatment allocation
+treatment_allocation = "treatment_allocation"
+
+print("Computing propensity weights on pooled data")
+# Instantiate IPTW class
+# We can specify the type of effect we want to estimate
+
+iptw = PooledIPTW(
+    treated_col=treatment_allocation,
+    event_col="event",
+    duration_col="time",
+    effect="ATE",
+)
+
+# We can now estimate the treatment effect
+iptw.fit(data)
+
+
+naive_comparison = NaiveComparison(
+    treated_col=treatment_allocation, event_col="event", duration_col="time"
+)
+naive_comparison.fit(data)
+
+maic = MatchingAjudsted(
+    treated_col=treatment_allocation,
+    event_col="event",
+    duration_col="time",
+    effect="ATE",
+)
+
+data = data.drop("weights", axis=1)
+
+mean_control = data.loc[data["treatment_allocation"] == 0, col_X].mean().to_frame().T
+mean_control = mean_control.add_suffix(".mean")
+sd_control = data.loc[data["treatment_allocation"] == 0, col_X].std().to_frame().T
+sd_control = sd_control.add_suffix(".sd")
+
+aggregated_control = pd.concat([mean_control, sd_control], axis=1)
+
+maic.fit(
+    data.loc[data["treatment_allocation"] == 1, :],
+    aggregated_control,
+    data.loc[data["treatment_allocation"] == 0, ["time", "event"]],
+)
+
+plt.clf()
+ax = plt.subplot(111)
+
+kmf_control = KaplanMeierFitter()
+ax = kmf_control.fit(
+    data.loc[data["treatment_allocation"] == 0, "time"],
+    data.loc[data["treatment_allocation"] == 0, "event"],
+    label="control",
+    weights=iptw.weights_[data["treatment_allocation"] == 0],
+).plot_survival_function(ax=ax)
+
+kmf_treated = KaplanMeierFitter()
+ax = kmf_treated.fit(
+    data.loc[data["treatment_allocation"] == 1, "time"],
+    data.loc[data["treatment_allocation"] == 1, "event"],
+    label="treated",
+    weights=iptw.weights_[data["treatment_allocation"] == 1],
+).plot_survival_function(ax=ax)
+
+plt.tight_layout()
+plt.savefig("km.png")
diff --git a/experiments/estimate_ate/hr_estimation.py b/experiments/estimate_ate/hr_estimation.py
new file mode 100644
index 00000000..d8e41211
--- /dev/null
+++ b/experiments/estimate_ate/hr_estimation.py
@@ -0,0 +1,95 @@
+import argparse
+import pickle
+import re
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import yaml
+
+from fedeca.competitors import MatchingAjudsted, NaiveComparison, PooledIPTW
+from fedeca.utils.experiment_utils import param_grid_from_dict, single_experiment
+from fedeca.utils.survival_utils import CoxData
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-c",
+        "--config",
+        help="Name of the config file",
+        action="store",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="Path to the output file",
+        action="store",
+    )
+    args = parser.parse_args()
+    if args.config is None:
+        args.config = "hr_estimation_power.yaml"
+    if args.output is None:
+        output = re.sub(r".*/", "", args.config)
+        args.output = "results_" + re.sub(r"\.yaml$", ".pkl", output)
+
+    with open(
+        Path(__file__).parent.parent / "config" / args.config,
+        "r",
+        encoding="utf-8",
+    ) as file:
+        config = yaml.safe_load(file)
+
+    TREATED = "treatment"
+    EVENT = "event"
+    TIME = "time"
+    iptw = PooledIPTW(
+        treated_col=TREATED,
+        event_col=EVENT,
+        duration_col=TIME,
+        effect="ATE",
+    )
+    maic = MatchingAjudsted(
+        treated_col=TREATED,
+        event_col=EVENT,
+        duration_col=TIME,
+    )
+    naive = NaiveComparison(
+        treated_col=TREATED,
+        event_col=EVENT,
+        duration_col=TIME,
+    )
+    models = {
+        "IPTW": iptw,
+        "MAIC": maic,
+        "Naive": naive,
+    }
+    config_experiment = config["parameters"]["experiments"]
+    df_params = param_grid_from_dict(config_experiment)
+    seeds = np.random.SeedSequence(config["seed"]).generate_state(df_params.shape[0])
+
+    results = []
+    for i, row in enumerate(df_params.itertuples()):
+        coxdata = CoxData(
+            n_samples=1,
+            cate=getattr(row, "cate"),
+            propensity="linear",
+            seed=seeds[i],
+            percent_ties=None,
+            ndim=getattr(row, "n_covariates"),
+            standardize_features=False,
+        )
+        results = results + [
+            single_experiment(
+                coxdata,
+                n_samples=getattr(row, "n_samples"),
+                models=models,
+                treated_col=TREATED,
+                event_col=EVENT,
+                duration_col=TIME,
+            )
+            for _ in range(getattr(row, "n_repeats"))
+        ]
+    results = pd.concat(results)
+
+    with open(args.output, "wb") as f1:
+        pickle.dump(results, f1)
diff --git a/experiments/estimate_ate/plot.py b/experiments/estimate_ate/plot.py
new file mode 100644
index 00000000..d62cee33
--- /dev/null
+++ b/experiments/estimate_ate/plot.py
@@ -0,0 +1,38 @@
+from typing import Optional
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from matplotlib.axes import Axes
+
+
+def plot_power(
+    df_res: pd.DataFrame,
+    fit_curve: bool = False,
+    deg: int = 2,
+    plot_kwargs: Optional[dict] = None,
+) -> Axes:
+    if plot_kwargs is None:
+        plot_kwargs = {}
+    plot_kwargs.setdefault("xlabel", "n_samples")
+    axis = plot_kwargs.pop("ax", plt.gca())
+
+    df_power = (
+        df_res.groupby(["method", "n_samples"])
+        .agg(
+            power=pd.NamedAgg(column="p", aggfunc=lambda x: (x < 0.05).sum() / x.size),
+        )
+        .reset_index()
+    )
+    for name, group in df_power.groupby("method"):
+        if (xlabel := plot_kwargs.pop("xlabel", None)) is not None:
+            axis.set_xlabel(xlabel)
+        if (ylabel := plot_kwargs.pop("ylabel", None)) is not None:
+            axis.set_ylabel(ylabel)
+        if fit_curve:
+            fit = np.poly1d(np.polyfit(group["n_samples"], group["power"], deg=deg))
+            axis.plot(group["n_samples"], fit(group["n_samples"]))
+        axis.scatter("n_samples", "power", data=group, label=name, **plot_kwargs)
+        axis.legend()
+
+    return axis
diff --git a/experiments/estimate_ate/plot_vary_overlap.py b/experiments/estimate_ate/plot_vary_overlap.py
new file mode 100644
index 00000000..a1ba4742
--- /dev/null
+++ b/experiments/estimate_ate/plot_vary_overlap.py
@@ -0,0 +1,105 @@
+# %%
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+
+def relative_error(x, y, absolute_error=False):
+    if absolute_error:
+        return np.abs(y - x) / np.abs(x)
+    else:
+        return np.linalg.norm(y - x) / np.linalg.norm(x)
+
+
+cmp = sns.color_palette()
+sns.set_palette(cmp)
+
+methods = [
+    "IPTW",
+    "MAIC",
+    # "CovAdj",
+    "Naive",
+]
+
+results = pd.read_pickle("results_sim_vary_overlap.pkl")
+
+
+# %%
+results_grouped = results.groupby(["overlap", "exp_id"])
+
+
+# Define a function to compute the relative error and ess for each group
+def compute_error_ess(group):
+    oracle_iptw = group[group["method"].str.lower() == "oracleiptw"]
+    error_oracle = group.apply(
+        lambda row: relative_error(
+            oracle_iptw["exp(coef)"].values[0], row["exp(coef)"], absolute_error=True
+        ),
+        axis=1,
+    )
+    error_gt = group.apply(
+        lambda row: relative_error(
+            oracle_iptw["ate_true"].values[0], row["exp(coef)"], absolute_error=True
+        ),
+        axis=1,
+    )
+
+    df = pd.DataFrame(
+        {
+            "err_oracle": error_oracle,
+            "err_ground_truth": error_gt,
+            "method": group["method"],
+            "exp_id": group["exp_id"],
+            "overlap": group["overlap"],
+            "ess": group["ess"],
+        }
+    )
+    # Drop rows with 'oracleiptw' method
+    df = df[df["method"].str.lower() != "oracleiptw"]
+
+    return df
+
+
+# Apply the function to each group
+df_res = results_grouped.apply(compute_error_ess)
+
+# Reset index
+df_res = df_res.reset_index(drop=True)
+
+# %%
+fig, ax = plt.subplots()
+sns.boxplot(x="overlap", y="err_oracle", hue="method", data=df_res)
+ax.set_ylabel("Relative error of ATE (compared to Oracle IPTW)")
+ax.set_yscale("log")
+ax.set_xlabel("Degree of shift b/w arms (0=strong overlap)")
+fig.tight_layout()
+
+plt.savefig("test_overlap_error_oracle.png")
+plt.show()
+plt.clf()
+
+# %%
+fig, ax = plt.subplots()
+sns.boxplot(x="overlap", y="err_ground_truth", hue="method", data=df_res)
+ax.set_ylabel("Relative error of ATE (compared to ground truth)")
+ax.set_yscale("log")
+ax.set_xlabel("Degree of shift b/w arms (0=strong overlap)")
+fig.tight_layout()
+
+plt.savefig("test_overlap_error_gt.png")
+plt.show()
+plt.clf()
+
+# %%
+fig, ax = plt.subplots()
+sns.boxplot(x="overlap", y="ess", hue="method", data=df_res)
+ax.set_ylabel("ESS (for treated arm)")
+ax.set_xlabel("Degree of shift b/w arms (0=strong overlap)")
+fig.tight_layout()
+
+plt.savefig("test_overlap_ess.png")
+plt.show()
+plt.clf()
+
+# %%
diff --git a/experiments/estimate_ate/sim_vary_overlap.py b/experiments/estimate_ate/sim_vary_overlap.py
new file mode 100644
index 00000000..9b118225
--- /dev/null
+++ b/experiments/estimate_ate/sim_vary_overlap.py
@@ -0,0 +1,90 @@
+"""Simulation data for FedECA."""
+import pickle
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import yaml
+
+from fedeca.competitors import (
+    CovariateAdjusted,
+    MatchingAjudsted,
+    NaiveComparison,
+    PooledIPTW,
+)
+from fedeca.utils.experiment_utils import single_experiment
+from fedeca.utils.survival_utils import CoxData
+
+if __name__ == "__main__":
+    with open(
+        Path(__file__).parent.parent / "config/pooled_vary_overlap.yaml",
+        "r",
+        encoding="utf-8",
+    ) as file:
+        config = yaml.safe_load(file)
+
+    TREATED = "treatment"
+    EVENT = "event"
+    TIME = "time"
+    PS = "propensity_scores"
+    iptw = PooledIPTW(
+        treated_col=TREATED,
+        event_col=EVENT,
+        duration_col=TIME,
+        effect="ATE",
+    )
+    maic = MatchingAjudsted(
+        treated_col=TREATED,
+        event_col=EVENT,
+        duration_col=TIME,
+    )
+    covadjust = CovariateAdjusted(
+        treated_col=TREATED,
+        event_col=EVENT,
+        duration_col=TIME,
+    )
+    naive = NaiveComparison(
+        treated_col=TREATED,
+        event_col=EVENT,
+        duration_col=TIME,
+    )
+    models = {
+        "IPTW": iptw,
+        "MAIC": maic,
+        # "CovAdj": covadjust,
+        "Naive": naive,
+        "OracleIPTW": iptw,
+    }
+    config_experiment = config["parameters"]["experiments"]
+    results = []
+    list_overlap = config_experiment["overlap"]
+    seeds = np.random.SeedSequence(config["seed"]).generate_state(len(list_overlap))
+    for i, overlap in enumerate(list_overlap):
+        coxdata = CoxData(
+            n_samples=config_experiment["n_samples"],
+            cate=0.0,
+            propensity="linear",
+            seed=seeds[i],
+            percent_ties=None,
+            ndim=config_experiment["n_covariates"],
+            standardize_features=False,
+            overlap=overlap,
+            prop_treated=config_experiment["prop_treated"],
+        )
+        for j in range(config_experiment["n_repeats"]):
+            res_single_exp = single_experiment(
+                coxdata,
+                n_samples=config_experiment["n_samples"],
+                models=models,
+                treated_col=TREATED,
+                event_col=EVENT,
+                duration_col=TIME,
+                ps_col=PS,
+            )
+
+            results.append(res_single_exp.assign(overlap=overlap, exp_id=j))
+
+    results = pd.concat(results)
+
+    with open("results_sim_vary_overlap.pkl", "wb") as f1:
+        pickle.dump(results, f1)
diff --git a/experiments/pooled_equivalent/plot_pooled_equivalent.py b/experiments/pooled_equivalent/plot_pooled_equivalent.py
new file mode 100644
index 00000000..fe31ac44
--- /dev/null
+++ b/experiments/pooled_equivalent/plot_pooled_equivalent.py
@@ -0,0 +1,69 @@
+"""Plot file for the pooled equivalent experiment."""
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+from fedeca.utils.constants import EXPERIMENTS_PATHS
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+from fedeca.viz.plot import owkin_palette
+
+
+def relative_error(x, y, absolute_error=False):
+    """Compute the relative error."""
+    if absolute_error:
+        return np.abs(y - x) / np.abs(x)
+    else:
+        return np.linalg.norm(y - x) / np.linalg.norm(x)
+
+
+names = ["Hazard Ratio", "Partial Log likelihood", "p-values", "Propensity scores"]
+cmp = sns.color_palette("colorblind")
+results = load_dataframe_from_pickles(
+    EXPERIMENTS_PATHS["pooled_equivalent"] + "results_Pooled_equivalent.pkl"
+)
+
+results_fl = results.loc[results["method"] == "FedECA", :]
+results_pooled = results.loc[results["method"] == "IPTW", :]
+
+errors = pd.DataFrame(
+    data=np.abs(
+        np.array(results_fl["exp(coef)"]) - np.array(results_pooled["exp(coef)"])
+    )
+    / np.abs(np.array(results_pooled["exp(coef)"])),
+    columns=["hazard ratio"],
+)
+
+errors["likelihood"] = np.abs(
+    np.array(results_fl["log_likelihood"]) - np.array(results_pooled["log_likelihood"])
+) / np.abs(np.array(results_pooled["log_likelihood"]))
+
+errors["p-values"] = np.abs(
+    np.array(results_fl["p"]) - np.array(results_pooled["p"])
+) / np.abs(np.array(results_pooled["p"]))
+
+errors["scores"] = np.array(
+    [
+        relative_error(
+            np.array(results_pooled["propensity_scores"].iloc[i]),
+            np.array(results_fl["propensity_scores"].iloc[i]),
+        )
+        for i in range(results_fl.shape[0])
+    ]
+)
+
+
+fig, axarr = plt.subplots(1, 1, figsize=(10, 5))
+sns.boxplot(
+    data=errors, palette=sns.color_palette(owkin_palette.values(), 9), width=0.5
+)
+ax = sns.swarmplot(data=errors, color=".25", size=4)
+
+axarr.hlines(y=1e-2, xmin=-0.5, xmax=3.5, linewidth=2, color="r", linestyle="--")
+axarr.set_yscale("log")
+axarr.set_xticks(np.arange(errors.shape[1]), names)
+axarr.set_title("Pooled IPTW versus FedECA")
+axarr.set_ylabel("Relative error")
+axarr.set_ylim((1e-9, 1))
+plt.tight_layout()
+plt.savefig("pooled_equivalent.pdf")
diff --git a/experiments/pooled_equivalent/plot_pooled_equivalent_nb_clients.py b/experiments/pooled_equivalent/plot_pooled_equivalent_nb_clients.py
new file mode 100644
index 00000000..f0c1ff11
--- /dev/null
+++ b/experiments/pooled_equivalent/plot_pooled_equivalent_nb_clients.py
@@ -0,0 +1,94 @@
+"""Plot file for the pooled equivalent experiment."""
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+from fedeca.utils.constants import EXPERIMENTS_PATHS
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+from fedeca.viz.plot import owkin_palette
+
+
+def relative_error(x, y, absolute_error=False):
+    """Compute the relative error."""
+    if absolute_error:
+        return np.abs(y - x) / np.abs(x)
+    else:
+        return np.linalg.norm(y - x) / np.linalg.norm(x)
+
+
+names = ["Hazard Ratio", "Partial Log likelihood", "p-values", "Propensity scores"]
+cmp = sns.color_palette("colorblind")
+results = load_dataframe_from_pickles(
+    EXPERIMENTS_PATHS["pooled_equivalent"] + "results_Pooled_equivalent_nb_clients.pkl"
+)
+
+n_clients = [2, 3, 5, 10]
+
+errors = {}
+for name in names:
+    errors[name] = pd.DataFrame()
+for n_client in n_clients:
+    results_tmp = results.loc[results["n_clients"] == n_client, :]
+    results_fl = results_tmp.loc[results_tmp["method"] == "FedECA", :]
+    results_pooled = results_tmp.loc[results_tmp["method"] == "IPTW", :]
+
+    errors["Hazard Ratio"][n_client] = pd.DataFrame(
+        data=np.abs(
+            np.array(results_fl["exp(coef)"]) - np.array(results_pooled["exp(coef)"])
+        )
+        / np.abs(np.array(results_pooled["exp(coef)"])),
+        columns=["hazard ratio"],
+    )
+
+    errors["Partial Log likelihood"][n_client] = np.abs(
+        np.array(results_fl["log_likelihood"])
+        - np.array(results_pooled["log_likelihood"])
+    ) / np.abs(np.array(results_pooled["log_likelihood"]))
+
+    errors["p-values"][n_client] = np.abs(
+        np.array(results_fl["p"]) - np.array(results_pooled["p"])
+    ) / np.abs(np.array(results_pooled["p"]))
+
+    errors["Propensity scores"][n_client] = np.array(
+        [
+            relative_error(
+                np.array(results_pooled["propensity_scores"].iloc[i]),
+                np.array(results_fl["propensity_scores"].iloc[i]),
+            )
+            for i in range(results_fl.shape[0])
+        ]
+    )
+
+dict_ylim = {
+    "Hazard Ratio": (1e-8, 1),
+    "Partial Log likelihood": (1e-10, 1),
+    "p-values": (1e-6, 1),
+    "Propensity scores": (1e-6, 1),
+}
+
+
+fig, axarr = plt.subplots(2, 2, figsize=(15, 7.5))
+j = 0
+for i, name in enumerate(names):
+    print(i)
+    if i > 1:
+        j = 1
+    sns.boxplot(
+        data=errors[name],
+        palette=sns.color_palette(owkin_palette.values(), 9),
+        width=0.5,
+        ax=axarr[i % 2, j],
+    )
+    sns.swarmplot(data=errors[name], color=".25", size=2, ax=axarr[i % 2, j])
+
+    axarr[i % 2, j].hlines(
+        y=1e-2, xmin=-0.5, xmax=3.5, linewidth=2, color="r", linestyle="--"
+    )
+    axarr[i % 2, j].set_yscale("log")
+    axarr[i % 2, j].set_xticks(np.arange(errors[name].shape[1]), n_clients)
+    axarr[i % 2, j].set_title(f"{name}")
+    axarr[i % 2, j].set_ylabel("Relative error")
+    axarr[i % 2, j].set_ylim(dict_ylim[name])
+plt.tight_layout()
+plt.savefig("pooled_equivalent_nb_clients.png")
diff --git a/experiments/power/plot_power_type_one_error.py b/experiments/power/plot_power_type_one_error.py
new file mode 100644
index 00000000..4435dec5
--- /dev/null
+++ b/experiments/power/plot_power_type_one_error.py
@@ -0,0 +1,119 @@
+"""Script for creating power analysis figure."""
+# %%
+import pandas as pd
+import seaborn as sns
+from matplotlib.patches import Patch
+from scipy import stats
+
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+from fedeca.viz.plot import owkin_palette
+from fedeca.viz.utils import adjust_legend_subtitles
+
+# %%
+# Raw results
+BASE_PATH = "/home/owkin/project/results_experiments/"
+FILE_OUTPUT = "results_Power_and_type_one_error_analyses.pkl"
+df_res_cov_shift = load_dataframe_from_pickles(
+    BASE_PATH + "power_and_type_one_error_cov_shift/" + FILE_OUTPUT
+)
+df_res_n_samples = load_dataframe_from_pickles(
+    BASE_PATH + "power_and_type_one_error_n_samples/" + FILE_OUTPUT
+)
+
+# %%
+# Aggregated results
+df_plot = pd.concat(
+    [
+        df_res_n_samples.drop(columns="overlap"),
+        df_res_cov_shift.drop(columns="n_samples"),
+    ]
+)
+df_plot["method"] = df_plot["method"].replace(r"_.*", "", regex=True)
+method_recoding = {
+    "IPTW": "FedECA*",
+}
+df_plot["method"] = df_plot["method"].replace(method_recoding)
+df_plot["cov_shift"] = 0.5 * (df_plot["overlap"] + 1)
+# Create dataframe for seaborn.FacetGrid
+df_plot["y"] = df_plot["cate"].replace({0.4: "power", 1.0: "type_one"})
+df_plot["y_value"] = df_plot["p"].lt(0.05).astype(int)
+df_plot = df_plot.melt(
+    id_vars=["method", "variance_method", "y", "y_value"],
+    value_vars=["n_samples", "cov_shift"],
+    value_name="x_value",
+    var_name="x",
+).dropna(how="any")
+# Create column to be used as legend labels
+df_plot["label"] = df_plot["method"].map(str) + " (" + df_plot["variance_method"] + ")"
+# Set category dtype, otherwise FacetGrid may bug with legend.
+# See https://github.com/mwaskom/seaborn/issues/2916
+df_plot["label"] = df_plot["label"].astype("category")
+
+# %%
+power_labels = [
+    "FedECA* (bootstrap)",
+    "FedECA* (robust)",
+    "MAIC (robust)",
+]
+g = sns.FacetGrid(
+    df_plot.query("y != 'power' | label in @power_labels"),
+    col="x",
+    row="y",
+    row_order=["type_one", "power"],
+    col_order=["cov_shift", "n_samples"],
+    height=3,
+    aspect=1.5,  # type: ignore
+    sharex="col",  # type: ignore
+    sharey="row",  # type: ignore
+    margin_titles=True,
+)
+g.map_dataframe(
+    sns.lineplot,
+    x="x_value",
+    y="y_value",
+    errorbar=("se", stats.norm.ppf(1 - 0.05 / 2)),
+    hue="label",
+    hue_order=[
+        "FedECA* (bootstrap)",
+        "FedECA* (robust)",
+        "FedECA* (naive)",
+        "MAIC (bootstrap)",
+        "MAIC (robust)",
+        "Unweighted (naive)",
+    ],
+    style="label",
+    dashes=None,
+    markers=True,
+    markersize=8,
+    err_style="bars",
+    palette=[list(owkin_palette.values())[i] for i in (3, 5, 1, 0, 4, 2)],
+)
+g.set_titles(col_template="", row_template="")
+n_col = g.axes.shape[1]
+for i, ax in enumerate(g.axes.flat):
+    if i // n_col == 0:
+        ax.axhline(0.05, color="black", linestyle="dashed", alpha=0.2)
+        ax.set_ylabel("Type I error")
+        ax.set(yscale="log")
+    if i // n_col == 1:
+        ax.set_ylabel("Statistical power")
+    if i % n_col == 0:
+        ax.set_xlabel("Covariate shift")
+    if i % n_col == 1:
+        ax.set_xticks(range(300, 1200, 200))
+        ax.set_xlabel("Number of samples")
+
+handles = list(g._legend_data.values())
+labels = list(g._legend_data.keys())
+handles.insert(0, Patch(visible=False))
+handles.insert(4, Patch(visible=False))
+labels.insert(0, "Requires federated learning")
+labels.insert(4, "Federated analytics")
+g._legend_data = dict(zip(labels, handles))
+g.add_legend()
+sns.move_legend(g, "center right", bbox_to_anchor=(0.94, 0.5))
+adjust_legend_subtitles(g.legend)
+g.figure.set_dpi(300)
+
+# %%
+g.savefig("fedeca_power_and_type_one_error.pdf", bbox_inches="tight", dpi=300)
diff --git a/experiments/real-world/plot_real_world_hydra.py b/experiments/real-world/plot_real_world_hydra.py
new file mode 100644
index 00000000..4524d056
--- /dev/null
+++ b/experiments/real-world/plot_real_world_hydra.py
@@ -0,0 +1,100 @@
+"""Plot file for timing experiments."""
+from os.path import join
+
+import pandas as pd
+import seaborn as sns
+
+from fedeca.utils.constants import EXPERIMENTS_PATHS
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+
+# TODO use Owkin's palette
+# from fedeca.viz.plot import owkin_palette
+
+
+cmp = sns.color_palette("colorblind")
+results = load_dataframe_from_pickles(
+    join(EXPERIMENTS_PATHS["real_world"], "results_Real-world_experiments.pkl")
+)
+
+fl_results = results.loc[results["backend_type"] != "N/A"]
+# A bit ugly but the loop actually launched 10* pooled instead of 5
+pooled_results = results.loc[results["backend_type"] == "N/A"]
+pooled_results = pooled_results.groupby(["n_clients"]).head(5)
+
+results = pd.concat([pooled_results, fl_results], ignore_index=True)
+
+
+assert set(results["backend_type"].unique().tolist()) == set(["N/A", "simu", "remote"])
+
+agg_df = results.groupby(["method", "backend_type", "n_clients"], as_index=False)[
+    "fit_time"
+].agg(["mean", "std", "count"])
+assert all(agg_df["count"] == 5)
+
+
+def e_to_latex(s):
+    splitted = s.split("e")
+    if len(splitted) == 2:
+        before, exponent = splitted
+        # get the sign of the exponent
+        if exponent.startswith("+"):
+            # remove the + sign and trim any leading zero
+            exponent = exponent[1:].lstrip("0")
+        elif exponent.startswith("-"):
+            # keep the -1 sign, but trim any leading zero
+            exponent = "-" + exponent[1:].lstrip("0")
+        else:
+            raise ValueError(f"Unexpected case {exponent}")
+        if exponent == "":
+            return before
+        else:
+            return before + "\cdot 10^{" + exponent + "}"  # noqa: W605
+    else:
+        return s
+
+
+for col in ["mean", "std"]:
+    agg_df[col] = agg_df[col].apply(lambda x: f"{x:.2e}")
+    agg_df[col] = agg_df[col].apply(e_to_latex)
+
+agg_df["mean"] = agg_df["mean"].apply(lambda x: "$" + x)
+agg_df["std"] = agg_df["std"].apply(lambda x: x + "$")
+#     agg_df[col] = pd.to_datetime(agg_df[col], unit="s").dt.strftime("%Hh %Mm %Ss.%f")
+#     # trimming ms
+#     agg_df[col] = [el[:-4] for el in agg_df[col].tolist()]
+#     # removing useless hours
+#     agg_df[col] = [
+#         el[4:] if el.startswith("00h ") else el for el in agg_df[col].tolist()
+#     ]
+#     # remove useless minutes
+#     agg_df[col] = [
+#         el[4:] if el.startswith("00m ") else el for el in agg_df[col].tolist()
+#     ]
+#     # remove useless seconds
+#     agg_df[col] = [
+#         el[3:] if el.startswith("00s") else el for el in agg_df[col].tolist()
+#     ]
+
+
+agg_df["timing"] = (
+    agg_df["mean"].astype(str).str.cat(agg_df["std"].astype(str), sep=r" \pm ")
+)
+agg_df = agg_df[["method", "backend_type", "n_clients", "timing"]]
+agg_df = agg_df.rename(
+    columns={
+        "method": "Method",
+        "backend_type": "Environment",
+        "n_clients": "\#clients",  # noqa: E501, W605
+        "timing": "Runtime (s)",
+    }
+)
+agg_df["Environment"] = pd.Categorical(agg_df["Environment"]).rename_categories(
+    {"remote": "real-world setup", "simu": "in-RAM"}
+)
+agg_df["Method"] = pd.Categorical(agg_df["Method"]).rename_categories(
+    {"FedECA": "FedECA"}
+)
+
+
+print(agg_df)
+print(agg_df.to_latex(index=False))
diff --git a/experiments/results/pooled_equivalent_dp/results_Pooled_equivalent_DP.pkl b/experiments/results/pooled_equivalent_dp/results_Pooled_equivalent_DP.pkl
new file mode 100644
index 00000000..6c87b4fc
Binary files /dev/null and b/experiments/results/pooled_equivalent_dp/results_Pooled_equivalent_DP.pkl differ
diff --git a/experiments/results/pooled_equivalent_ties/results_Pooled_equivalent_ties.pkl b/experiments/results/pooled_equivalent_ties/results_Pooled_equivalent_ties.pkl
new file mode 100644
index 00000000..de2d8846
Binary files /dev/null and b/experiments/results/pooled_equivalent_ties/results_Pooled_equivalent_ties.pkl differ
diff --git a/experiments/results/real-world/results_Real-world_experiments.pkl b/experiments/results/real-world/results_Real-world_experiments.pkl
new file mode 100644
index 00000000..1f232cd9
Binary files /dev/null and b/experiments/results/real-world/results_Real-world_experiments.pkl differ
diff --git a/experiments/robust_pooled_equivalent/robust_plot_pooled_equivalent.py b/experiments/robust_pooled_equivalent/robust_plot_pooled_equivalent.py
new file mode 100644
index 00000000..7bc4ab56
--- /dev/null
+++ b/experiments/robust_pooled_equivalent/robust_plot_pooled_equivalent.py
@@ -0,0 +1,70 @@
+"""Plot file for the pooled equivalent experiment."""
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+from fedeca.utils.constants import EXPERIMENTS_PATHS
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+from fedeca.viz.plot import owkin_palette
+
+
+def relative_error(x, y, absolute_error=False):
+    """Compute the relative error."""
+    if absolute_error:
+        return np.abs(y - x) / np.abs(x)
+    else:
+        return np.linalg.norm(y - x) / np.linalg.norm(x)
+
+
+names = ["Hazard Ratio", "Partial Log likelihood", "p-values", "Propensity scores"]
+cmp = sns.color_palette("colorblind")
+results = load_dataframe_from_pickles(
+    EXPERIMENTS_PATHS["robust_pooled_equivalence"]
+    + "results_Robust_Pooled_Equivalent.pkl"
+)
+
+results_fl = results.loc[results["method"] == "FedECA", :]
+results_pooled = results.loc[results["method"] == "IPTW", :]
+
+errors = pd.DataFrame(
+    data=np.abs(
+        np.array(results_fl["exp(coef)"]) - np.array(results_pooled["exp(coef)"])
+    )
+    / np.abs(np.array(results_pooled["exp(coef)"])),
+    columns=["hazard ratio"],
+)
+
+errors["likelihood"] = np.abs(
+    np.array(results_fl["log_likelihood"]) - np.array(results_pooled["log_likelihood"])
+) / np.abs(np.array(results_pooled["log_likelihood"]))
+
+errors["p-values"] = np.abs(
+    np.array(results_fl["p"]) - np.array(results_pooled["p"])
+) / np.abs(np.array(results_pooled["p"]))
+
+errors["scores"] = np.array(
+    [
+        relative_error(
+            np.array(results_pooled["propensity_scores"].iloc[i]),
+            np.array(results_fl["propensity_scores"].iloc[i]),
+        )
+        for i in range(results_fl.shape[0])
+    ]
+)
+
+
+fig, axarr = plt.subplots(1, 1, figsize=(10, 5))
+sns.boxplot(
+    data=errors, palette=sns.color_palette(owkin_palette.values(), 9), width=0.5
+)
+ax = sns.swarmplot(data=errors, color=".25", size=4)
+
+axarr.hlines(y=1e-2, xmin=-0.5, xmax=3.5, linewidth=2, color="r", linestyle="--")
+axarr.set_yscale("log")
+axarr.set_xticks(np.arange(errors.shape[1]), names)
+axarr.set_title("Pooled IPTW versus FedECA")
+axarr.set_ylabel("Relative error")
+axarr.set_ylim((1e-9, 1))
+plt.tight_layout()
+plt.savefig("robust_pooled_equivalent.pdf")
diff --git a/experiments/robust_pooled_equivalent/robust_plot_pooled_equivalent_nb_clients.py b/experiments/robust_pooled_equivalent/robust_plot_pooled_equivalent_nb_clients.py
new file mode 100644
index 00000000..9148a615
--- /dev/null
+++ b/experiments/robust_pooled_equivalent/robust_plot_pooled_equivalent_nb_clients.py
@@ -0,0 +1,95 @@
+"""Plot file for the pooled equivalent experiment."""
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+from fedeca.utils.constants import EXPERIMENTS_PATHS
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+from fedeca.viz.plot import owkin_palette
+
+
+def relative_error(x, y, absolute_error=False):
+    """Compute the relative error."""
+    if absolute_error:
+        return np.abs(y - x) / np.abs(x)
+    else:
+        return np.linalg.norm(y - x) / np.linalg.norm(x)
+
+
+names = ["Hazard Ratio", "Partial Log likelihood", "p-values", "Propensity scores"]
+cmp = sns.color_palette("colorblind")
+results = load_dataframe_from_pickles(
+    EXPERIMENTS_PATHS["robust_pooled_equivalence"]
+    + "results_Robust_Pooled_Equivalent_nb_clients.pkl"
+)
+
+n_clients = [2, 3, 5, 10]
+
+errors = {}
+for name in names:
+    errors[name] = pd.DataFrame()
+for n_client in n_clients:
+    results_tmp = results.loc[results["n_clients"] == n_client, :]
+    results_fl = results_tmp.loc[results_tmp["method"] == "FedECA", :]
+    results_pooled = results_tmp.loc[results_tmp["method"] == "IPTW", :]
+
+    errors["Hazard Ratio"][n_client] = pd.DataFrame(
+        data=np.abs(
+            np.array(results_fl["exp(coef)"]) - np.array(results_pooled["exp(coef)"])
+        )
+        / np.abs(np.array(results_pooled["exp(coef)"])),
+        columns=["hazard ratio"],
+    )
+
+    errors["Partial Log likelihood"][n_client] = np.abs(
+        np.array(results_fl["log_likelihood"])
+        - np.array(results_pooled["log_likelihood"])
+    ) / np.abs(np.array(results_pooled["log_likelihood"]))
+
+    errors["p-values"][n_client] = np.abs(
+        np.array(results_fl["p"]) - np.array(results_pooled["p"])
+    ) / np.abs(np.array(results_pooled["p"]))
+
+    errors["Propensity scores"][n_client] = np.array(
+        [
+            relative_error(
+                np.array(results_pooled["propensity_scores"].iloc[i]),
+                np.array(results_fl["propensity_scores"].iloc[i]),
+            )
+            for i in range(results_fl.shape[0])
+        ]
+    )
+
+dict_ylim = {
+    "Hazard Ratio": (1e-8, 1),
+    "Partial Log likelihood": (1e-10, 1),
+    "p-values": (1e-6, 1),
+    "Propensity scores": (1e-6, 1),
+}
+
+
+fig, axarr = plt.subplots(2, 2, figsize=(15, 7.5))
+j = 0
+for i, name in enumerate(names):
+    print(i)
+    if i > 1:
+        j = 1
+    sns.boxplot(
+        data=errors[name],
+        palette=sns.color_palette(owkin_palette.values(), 9),
+        width=0.5,
+        ax=axarr[i % 2, j],
+    )
+    sns.swarmplot(data=errors[name], color=".25", size=2, ax=axarr[i % 2, j])
+
+    axarr[i % 2, j].hlines(
+        y=1e-2, xmin=-0.5, xmax=3.5, linewidth=2, color="r", linestyle="--"
+    )
+    axarr[i % 2, j].set_yscale("log")
+    axarr[i % 2, j].set_xticks(np.arange(errors[name].shape[1]), n_clients)
+    axarr[i % 2, j].set_title(f"{name}")
+    axarr[i % 2, j].set_ylabel("Relative error")
+    axarr[i % 2, j].set_ylim(dict_ylim[name])
+plt.tight_layout()
+plt.savefig("robust_pooled_equivalent_nb_clients.png")
diff --git a/experiments/run_experiment.py b/experiments/run_experiment.py
new file mode 100644
index 00000000..e37adf21
--- /dev/null
+++ b/experiments/run_experiment.py
@@ -0,0 +1,74 @@
+"""Main module for running hydra config based experiments."""
+import pickle
+import re
+from collections.abc import Mapping
+
+import hydra
+import numpy as np
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import DictConfig, open_dict
+
+from fedeca.utils.experiment_utils import single_experiment
+from fedeca.utils.survival_utils import BaseSurvivalEstimator, CoxData
+
+
+@hydra.main(version_base=None, config_path="config", config_name="default_config")
+def run_experiment(cfg: DictConfig):
+    """Run experiment with hydra configs."""
+    with open_dict(cfg):
+        models_common = cfg.pop("models_common")
+        for model in cfg.models.values():
+            model.update(models_common)
+    config_hydra = HydraConfig.get()
+    job_num = config_hydra.job.num
+    if (job_range := cfg.get("job_range", None)) is not None and (
+        job_num < job_range[0] or job_num >= job_range[1]
+    ):
+        return
+    if (job_list := cfg.get("job_list", None)) is not None and job_num not in job_list:
+        return
+    # Set seed for each job in a deterministic manner using the initial_seed
+    seed_seq = np.random.SeedSequence(cfg.initial_seed).spawn(job_num + 1)[-1]
+    cfg.data.seed = int(seed_seq.generate_state(1)[0])
+
+    output = re.sub(r"\s", "_", cfg.name)
+    if (batch_id := cfg.get("batch_id", None)) is not None:
+        output = output + f"_batch_{batch_id}"
+    output = f"{config_hydra.sweep.dir}/results_{output}.pkl"
+
+    data_gen: CoxData = hydra.utils.instantiate(cfg.data)
+    models: Mapping[str, BaseSurvivalEstimator] = dict(
+        (name, hydra.utils.instantiate(model)) for name, model in cfg.models.items()
+    )
+    for model in models.values():
+        model.set_random_state(data_gen.rng)
+    if "fit_fedeca" in cfg.keys():
+        fedeca_config = hydra.utils.instantiate(cfg.fit_fedeca)
+    else:
+        fedeca_config = None
+
+    results = [
+        single_experiment(
+            data_gen,
+            n_samples=cfg.parameters.n_samples,
+            models=models,
+            treated_col=models_common.treated_col,
+            event_col=models_common.event_col,
+            duration_col=models_common.duration_col,
+            ps_col=models_common.ps_col,
+            fit_fedeca=fedeca_config,
+            return_propensities=cfg.parameters.return_propensities,
+            return_weights=cfg.parameters.return_weights,
+        ).assign(
+            rep_id=rep_id,
+            cate=cfg.data.cate,
+            propensity=cfg.data.propensity,
+        )
+        for rep_id in range(cfg.parameters.n_reps)
+    ]
+    with open(output, "ab") as file:
+        pickle.dump(results, file)
+
+
+if __name__ == "__main__":
+    run_experiment()  # pylint:disable=no-value-for-parameter
diff --git a/experiments/simulated_cox/sim_cox_fedeca.py b/experiments/simulated_cox/sim_cox_fedeca.py
new file mode 100644
index 00000000..c80359da
--- /dev/null
+++ b/experiments/simulated_cox/sim_cox_fedeca.py
@@ -0,0 +1,187 @@
+"""Simulation data for FedECA."""
+import pickle
+
+import numpy as np
+import pandas as pd
+import torch
+import yaml
+
+from fedeca import FedECA, PooledIPTW
+from fedeca.utils.constants import EXPERIMENTS_PATHS
+from fedeca.utils.survival_utils import CoxData
+
+
+def relative_error(x, y, absolute_error=False):
+    """Compute the relative error."""
+    if absolute_error:
+        return np.abs(y - x) / np.abs(x)
+    else:
+        return np.linalg.norm(y - x) / np.linalg.norm(x)
+
+
+def simulated_fl_benchmark(
+    nb_client=2,
+    n_samples=500,
+    percent_ties=None,
+    n_repeat=5,
+    n_covariates=10,
+    group_treated=False,
+    nb_rounds_list=[10, 10],
+):
+    """Execute the experiment."""
+    error = {"weights": [], "treatment effect": [], "p-values": [], "likelihood": []}
+    seeds = []
+    for k in range(n_repeat):
+        seed = 123 + k
+        print("Lauching data generation with seed", seed)
+        # Simulate data
+        coxdata = CoxData(
+            n_samples=n_samples,
+            cate=1.0,
+            seed=seed,
+            percent_ties=percent_ties,
+            ndim=n_covariates,
+            propensity="linear",
+        )
+        X, times, censoring, treat_alloc = coxdata.generate_data()
+        col_X = ["X_%i" % i for i in range(X.shape[1])]
+
+        data = np.concatenate(
+            [
+                X,
+                times[:, np.newaxis],
+                censoring[:, np.newaxis],
+                treat_alloc[:, np.newaxis],
+            ],
+            axis=1,
+        )
+
+        data = pd.DataFrame(
+            data=data, columns=col_X + ["time", "event", "treatment_allocation"]
+        )
+        # define treatment allocation
+        treatment_allocation = "treatment_allocation"
+
+        print("Computing propensity weights on pooled data")
+        # Instantiate IPTW class
+        # We can specify the type of effect we want to estimate
+
+        iptw = PooledIPTW(
+            treated_col=treatment_allocation,
+            event_col="event",
+            duration_col="time",
+            effect="ATE",
+        )
+
+        # We can now estimate the treatment effect
+        iptw.fit(data)
+        df = data.drop(columns=["weights"])
+        df["treatment_allocation"] = df["treatment_allocation"].values.astype("uint8")
+        print("Computing propensity weights on distributed data")
+
+        fl_iptw = FedECA(
+            ndim=n_covariates,
+            treated_col="treatment_allocation",
+            duration_col="time",
+            event_col="event",
+            num_rounds_list=nb_rounds_list,
+            dtype="float64",
+        )
+        if group_treated:
+            split_method = ("split_control_over_centers",)
+            split_method_kwargs = {"treatment_info": "treated"}
+        else:
+            split_method = "uniform"
+            split_method_kwargs = None
+        fl_iptw.fit(df, None, nb_client, split_method, split_method_kwargs)
+
+        m = fl_iptw.propensity_model
+
+        Xprop = torch.from_numpy(X)
+        with torch.no_grad():
+            propensity_scores = m(Xprop)
+
+        propensity_scores = propensity_scores.detach().numpy().flatten()
+        weights = df["treatment_allocation"] * 1.0 / propensity_scores + (
+            1 - df["treatment_allocation"]
+        ) * 1.0 / (1.0 - propensity_scores)
+
+        # L2 error || fl_weights - pooled_weights ||_2
+        error["weights"].append(relative_error(data["weights"], weights))
+        error["p-values"].append(
+            relative_error(
+                iptw.results_["p"].iloc[0],
+                fl_iptw.results_["p"].iloc[0],
+                absolute_error=True,
+            )
+        )
+
+        error["treatment effect"].append(
+            relative_error(
+                iptw.results_["coef"].iloc[0],
+                fl_iptw.results_["coef"].iloc[0],
+                absolute_error=True,
+            )
+        )
+        error["likelihood"].append(
+            relative_error(iptw.log_likelihood_, fl_iptw.ll, absolute_error=True)
+        )
+
+        seeds.append(seed)
+
+    return (
+        nb_client,
+        percent_ties,
+        n_covariates,
+        n_repeat,
+        np.array(seeds),
+        error,
+        group_treated,
+        nb_rounds_list,
+    )
+
+
+if __name__ == "__main__":
+    with open("../config/pooled_equivalent_hardcore.yaml", "r") as file:
+        config_experiment = yaml.safe_load(file)
+    results = []
+
+    for nb_client in config_experiment["parameters"]["experiments"]["nb_clients"]:
+        results.append(
+            simulated_fl_benchmark(
+                nb_client=nb_client,
+                percent_ties=config_experiment["parameters"]["experiments"][
+                    "percent_ties"
+                ],
+                n_repeat=config_experiment["parameters"]["experiments"]["n_repeat"],
+                n_covariates=config_experiment["parameters"]["experiments"][
+                    "n_covariates"
+                ],
+                n_samples=config_experiment["parameters"]["experiments"]["n_samples"],
+                group_treated=config_experiment["parameters"]["experiments"][
+                    "group_treated"
+                ],
+                nb_rounds_list=config_experiment["parameters"]["fedeca"][
+                    "nb_rounds_list"
+                ],
+            )
+        )
+    results = pd.DataFrame(
+        data=results,
+        columns=[
+            "nb_clients",
+            "percent_ties",
+            "n_covariates",
+            "n_repeat",
+            "seeds",
+            "error",
+            "group treated",
+            "nb rounds",
+        ],
+    )
+    with open(
+        EXPERIMENTS_PATHS["pooled_equivalent"]
+        + "results_sim_cox_pooled_equivalent_hardcore.pkl",
+        "wb",
+    ) as f1:
+        pickle.dump(results, f1)
diff --git a/experiments/smd/plot_smd.py b/experiments/smd/plot_smd.py
new file mode 100644
index 00000000..782aff5d
--- /dev/null
+++ b/experiments/smd/plot_smd.py
@@ -0,0 +1,76 @@
+import pandas as pd
+import seaborn as sns
+
+from fedeca.utils.experiment_utils import load_dataframe_from_pickles
+from fedeca.viz.plot import owkin_palette
+
+# Load raw results
+fname = (
+    "/home/owkin/project/results_experiments/smd_cov_shift/results_smd_cov_shift.pkl"
+)
+df_res = load_dataframe_from_pickles(fname)
+
+df_res["cov_shift"] = 0.5 * (df_res["overlap"] + 1)
+
+# Preprocess results
+df = df_res.filter(regex=r"cov_shift|method|rep_id|smd_\w+_X_")
+df = (
+    pd.wide_to_long(
+        df.reset_index(drop=True).reset_index(),
+        stubnames=["smd_weighted", "smd_raw"],
+        i="index",
+        j="covariate",
+        sep="_",
+        suffix=r"\w+",
+    )
+    .reset_index()
+    .drop(columns="index")
+)
+df = (
+    pd.wide_to_long(
+        df.reset_index(),
+        stubnames="smd",
+        i="index",
+        j="weighted",
+        sep="_",
+        suffix=r"\w+",
+    )
+    .reset_index()
+    .drop(columns="index")
+)
+df["weighted"] = df["weighted"].replace({"weighted": True, "raw": False})
+method_recoding = {
+    "FedECA": "FedECA",
+    "IPTW": "IPTW",
+    "MAIC": "MAIC",
+}
+df["method"] = df["method"].replace(method_recoding)
+
+# Plot
+g = sns.FacetGrid(
+    df[
+        df["cov_shift"].isin([0, 2])
+        & df["covariate"].isin(["X_0", "X_1", "X_2", "X_3", "X_4"])
+    ],
+    col="method",
+    col_order=["IPTW", "FedECA", "MAIC"],
+    row="cov_shift",
+    height=3.5,  # type: ignore
+    aspect=0.8,  # type: ignore
+    margin_titles=True,
+)
+g.map_dataframe(
+    sns.boxplot,
+    x="smd",
+    y="covariate",
+    hue="weighted",
+    width=0.3,
+    palette=owkin_palette.values(),
+)
+g.set_xlabels("Standardized mean difference")
+g.set_ylabels("Covariate")
+g.set_titles(col_template="{col_name}", row_template="Covariate shift = {row_name}")
+for ax in g.axes.flat:
+    ax.axvline(0, color="black", linestyle="--", alpha=0.2)
+g.add_legend(title="Weighted")
+g.savefig("smd_cov_shift.pdf", bbox_inches="tight")
diff --git a/fedeca/__init__.py b/fedeca/__init__.py
new file mode 100644
index 00000000..1c680cc9
--- /dev/null
+++ b/fedeca/__init__.py
@@ -0,0 +1,3 @@
+"""Top level package for :mod:`fedeca`."""
+from .fedeca_core import FedECA
+from .competitors import PooledIPTW, MatchingAjudsted, NaiveComparison
diff --git a/fedeca/algorithms/__init__.py b/fedeca/algorithms/__init__.py
new file mode 100644
index 00000000..9c541a01
--- /dev/null
+++ b/fedeca/algorithms/__init__.py
@@ -0,0 +1,2 @@
+"""Init file for algorithms."""
+from .torch_webdisco_algo import TorchWebDiscoAlgo
diff --git a/fedeca/algorithms/torch_dp_fed_avg_algo.py b/fedeca/algorithms/torch_dp_fed_avg_algo.py
new file mode 100644
index 00000000..a10f21b5
--- /dev/null
+++ b/fedeca/algorithms/torch_dp_fed_avg_algo.py
@@ -0,0 +1,450 @@
+"""Differentially private algorithm to be used with FedAvg strategy."""
+import logging
+from typing import Any, Optional
+
+import numpy as np
+import torch
+from opacus.accountants.rdp import RDPAccountant
+from opacus.accountants.utils import get_noise_multiplier
+from opacus.data_loader import DPDataLoader
+from opacus.grad_sample.grad_sample_module import GradSampleModule
+from opacus.grad_sample.utils import wrap_model
+from opacus.optimizers.optimizer import DPOptimizer
+from opacus.privacy_engine import PrivacyEngine
+from substrafl.algorithms.pytorch import weight_manager
+from substrafl.algorithms.pytorch.torch_fed_avg_algo import TorchFedAvgAlgo
+from substrafl.remote import remote_data
+from substrafl.strategies.schemas import FedAvgAveragedState, FedAvgSharedState
+
+logger = logging.getLogger(__name__)
+
+
+class TorchDPFedAvgAlgo(TorchFedAvgAlgo):
+    """To be inherited.
+
+    Wraps the necessary operation so a torch model can be trained in the Federated
+    Averaging strategy using DP.
+    """
+
+    def __init__(
+        self,
+        model: torch.nn.Module,
+        criterion: torch.nn.modules.loss._Loss,
+        optimizer: torch.optim.Optimizer,
+        dataset: torch.utils.data.Dataset,
+        num_updates: int,
+        batch_size: int,
+        scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
+        with_batch_norm_parameters: bool = False,
+        seed: Optional[int] = None,
+        use_gpu: bool = True,
+        dp_target_epsilon: float = None,
+        dp_target_delta: float = None,
+        dp_max_grad_norm: float = None,
+        num_rounds: int = None,
+        *args,
+        **kwargs,
+    ):
+        """Instantiate a TorchDPFedAvgAlgo.
+
+        Parameters
+        ----------
+        model : torch.nn.modules.module.Module
+            A torch model.
+        criterion : torch.nn.modules.loss._Loss
+            A torch criterion (loss).
+        optimizer : torch.optim.Optimizer
+            A torch optimizer linked to the model.
+        dataset : torch.utils.data.Dataset
+            Refer to the doc of the parent class.
+            This behavior can be changed by re-writing the `_local_train` or
+            `predict` methods.
+        num_updates : int
+            The number of updates to perform. Note that here we do not use
+            NpIndexGenerators.
+        batch_size : int
+            The batch-size to target in expectation (Poisson sampling).
+        scheduler : torch.optim.lr_scheduler._LRScheduler, Optional
+            A torch scheduler that will be called at every batch. If None, no
+            scheduler will be used. Defaults to None.
+        with_batch_norm_parameters : bool
+            Whether to include the batch norm layer parameters in the federated
+            average strategy. Defaults to False.
+        seed : typing.Optional[int]
+            Seed set at the algo initialization on each organization.
+            Defaults to None.
+        use_gpu : bool
+            Whether to use the GPUs if they are available. Defaults to True.
+        dp_target_epsilon : float
+            The target epsilon for (epsilon, delta)-differential private guarantee.
+            Defaults to None.
+        dp_target_delta : float
+            The target delta for (epsilon, delta)-differential private guarantee.
+            Defaults to None.
+        dp_max_grad_norm : float
+            The maximum L2 norm of per-sample gradients; used to enforce
+            differential privacy. Defaults to None.
+        num_rounds : int
+            The number of rounds used to train the algo. Although this is very
+            peculiar for a substra Algorithm to need access to this quantity,
+            Opacus needs the number of rounds and updates used to compute the
+            total number of training steps in order to compute a noise level
+            respecting user constraints.
+        """
+        super().__init__(
+            model=model,
+            criterion=criterion,
+            optimizer=optimizer,
+            dataset=dataset,
+            scheduler=scheduler,
+            seed=seed,
+            use_gpu=use_gpu,
+            index_generator=None,
+            *args,
+            **kwargs,
+        )
+        self._with_batch_norm_parameters = with_batch_norm_parameters
+        self.dp_target_delta = dp_target_delta
+        self.dp_target_epsilon = dp_target_epsilon
+        self.dp_max_grad_norm = dp_max_grad_norm
+        self.num_rounds = num_rounds
+
+        self._apply_dp = (
+            (self.dp_target_epsilon is not None)
+            and (self.dp_max_grad_norm is not None)
+            and (self.dp_target_delta is not None)
+        )
+
+        if not (self._apply_dp):
+            raise ValueError(
+                "Do not use this Algo without DP you risk running into batch"
+                " sampling issues, instead use TorchFedAvgAlgo with NpIndexGenerator"
+            )
+        if self.num_rounds is None:
+            raise ValueError(
+                "if you want to perform DP-training you need to prespecify the"
+                " number of rounds in advance."
+            )
+        self.num_updates = num_updates
+        self.batch_size = batch_size
+
+        self.num_total_steps = self.num_updates * self.num_rounds
+
+    def _local_train(
+        self,
+        train_dataset: torch.utils.data.Dataset,
+    ):
+        """Contain the local training loop.
+
+        Train the model on ``num_updates`` minibatches for the torch dataset.
+
+        Parameters
+        ----------
+            train_dataset : torch.utils.data.Dataset
+                train_dataset build from the x and y returned by the opener.
+        """
+        # Create torch dataloader it is important that it has a self.batch_size
+        # batch size as len(train_data_loader) will be called by opacus
+        train_data_loader = torch.utils.data.DataLoader(
+            train_dataset, batch_size=self.batch_size
+        )
+        if not hasattr(self, "size_train_dataset"):
+            self.size_train_dataset = len(train_dataset)
+
+        if not hasattr(
+            self, "accountant"
+        ):  # if the attribute is not already there, need to instantiate the Engine
+            # Important to use RDP to be able to use high epsilons
+            # see https://github.com/pytorch/opacus/issues/604
+            privacy_engine = PrivacyEngine(accountant="rdp")
+
+            if not hasattr(self, "sample_rate"):
+                self.sample_rate = self.batch_size / len(train_dataset)
+            else:
+                assert np.allclose(
+                    self.sample_rate, self.batch_size / self.size_train_dataset
+                ), "The length of the dataset has changed"
+
+            # We will need it later
+            self.noise_multiplier = get_noise_multiplier(
+                target_epsilon=self.dp_target_epsilon,
+                target_delta=self.dp_target_delta,
+                sample_rate=self.sample_rate,
+                steps=self.num_total_steps,
+                accountant=privacy_engine.accountant.mechanism(),
+            )
+
+            (
+                self._model,
+                self._optimizer,
+                train_data_loader,
+            ) = privacy_engine.make_private(
+                module=self._model,
+                optimizer=self._optimizer,
+                data_loader=train_data_loader,
+                noise_multiplier=self.noise_multiplier,
+                max_grad_norm=self.dp_max_grad_norm,
+                poisson_sampling=True,
+            )
+            self.accountant = privacy_engine.accountant
+
+        else:
+            train_data_loader = DPDataLoader.from_data_loader(train_data_loader)
+
+        for x_batch, y_batch in train_data_loader:
+            x_batch = x_batch.to(self._device)
+            y_batch = y_batch.to(self._device)
+            # As batch-size is variable sometimes the batch is empty
+            if x_batch.nelement() == 0:
+                continue
+            # Forward pass
+            y_pred = self._model(x_batch)
+
+            # Compute Loss
+            loss = self._criterion(y_pred, y_batch)
+
+            self._optimizer.zero_grad()
+            loss.backward()
+
+            self._optimizer.step()
+
+            if self._scheduler is not None:
+                self._scheduler.step()
+
+    @remote_data
+    def train(
+        self,
+        datasamples: Any,
+        shared_state: Optional[FedAvgAveragedState] = None,
+    ) -> FedAvgSharedState:
+        """Train method of the DP federated averaging strategy.
+
+        This method is essentially the same as the regular federated average
+        algorithm but without an index generator.
+
+        Parameters
+        ----------
+        datasamples : typing.Any
+            Input data returned by the ``get_data`` method from the opener.
+        shared_state : FedAvgAveragedState, Optional
+            Dictionary containing torch parameters that will be set to the model.
+            Defaults to None.
+
+        Returns
+        -------
+        FedAvgSharedState
+            Weight update (delta between fine-tuned weights and previous weights).
+        """
+        # Note that we don't simply inherit from the method from FedAvgTorchAlgo
+        # because it assumes the existence of the NpIndexGenerator
+
+        # Create torch dataset
+        train_dataset = self._dataset(datasamples, is_inference=False)
+
+        if shared_state is not None:
+            # The shared states is the average of the model parameter updates
+            # for all organizations
+            # Hence we need to add it to the previous local state parameters
+            parameter_updates = [
+                torch.from_numpy(x).to(self._device)
+                for x in shared_state.avg_parameters_update
+            ]
+            weight_manager.increment_parameters(
+                model=self._model,
+                updates=parameter_updates,
+                with_batch_norm_parameters=self._with_batch_norm_parameters,
+            )
+
+        old_parameters = weight_manager.get_parameters(
+            model=self._model,
+            with_batch_norm_parameters=self._with_batch_norm_parameters,
+        )
+
+        # Train mode for torch model
+        self._model.train()
+
+        # Train the model
+        self._local_train(train_dataset)
+
+        self._model.eval()
+
+        parameters_update = weight_manager.subtract_parameters(
+            parameters=weight_manager.get_parameters(
+                model=self._model,
+                with_batch_norm_parameters=self._with_batch_norm_parameters,
+            ),
+            parameters_to_subtract=old_parameters,
+        )
+
+        # Re set to the previous state
+        weight_manager.set_parameters(
+            model=self._model,
+            parameters=old_parameters,
+            with_batch_norm_parameters=self._with_batch_norm_parameters,
+        )
+
+        return FedAvgSharedState(
+            n_samples=len(train_dataset),
+            parameters_update=[p.cpu().detach().numpy() for p in parameters_update],
+        )
+
+    def _local_predict(
+        self,
+        predict_dataset: torch.utils.data.Dataset,
+        predictions_path,
+        return_predictions=False,
+    ):
+        """Predict.
+
+        Parameters
+        ----------
+        predict_dataset : torch.utils.data.Dataset
+            Predict dataset built from the `x` returned by the opener.
+
+        Important
+        ---------
+        The responsibility is on the user to save the computed predictions.
+        Substrafl provides the `TorchAlgo._save_predictions` method for this
+        purpose.
+        The user can load those predictions from a metric file with the command:
+        `y_pred = np.load(inputs['predictions'])`.
+
+        Raises
+        ------
+        BatchSizeNotFoundError
+            No default batch size has been found to perform local prediction.
+            Please override the predict function of your algorithm.
+        """
+        # Note that we don't simply inherit from the method from FedAvgTorchAlgo
+        # because it assumes the existence of the NpIndexGenerator
+
+        predict_loader = torch.utils.data.DataLoader(
+            predict_dataset, batch_size=self.batch_size, shuffle=False, drop_last=False
+        )
+
+        self._model.eval()
+
+        predictions = []
+        with torch.no_grad():
+            for x in predict_loader:
+                x = x.to(self._device)
+                predictions.append(self._model(x))
+        predictions = torch.cat(predictions, 0)
+        predictions = predictions.cpu().detach()
+        if return_predictions:
+            return predictions
+        else:
+            self._save_predictions(predictions, predictions_path)
+
+    def _get_state_to_save(self) -> dict:
+        """Get all attibutes to save and pass on to next state.
+
+        Returns
+        -------
+        dict
+            The dict with all quantities to persist.
+        """
+        checkpoint = super()._get_state_to_save()
+
+        list_attrs_to_save = [
+            "dp_max_grad_norm",
+            "dp_target_epsilon",
+            "dp_target_delta",
+            "num_rounds",
+            "num_updates",
+            "num_total_steps",
+            "batch_size",
+        ]
+        list_of_attrs_after_train = [
+            "noise_multiplier",
+            "sample_rate",
+            "size_train_dataset",
+        ]
+        # For some reason this method is called before ever calling train so
+        # at first it doesn't have an accountant
+        if hasattr(self, "accountant"):
+            checkpoint["privacy_accountant_state_dict"] = self.accountant.state_dict()
+            list_attrs_to_save += list_of_attrs_after_train
+
+        for attr in list_attrs_to_save:
+            checkpoint[attr] = getattr(self, attr)
+
+        return checkpoint
+
+    def _update_from_checkpoint(self, path) -> dict:
+        """Set self attributes using saved values.
+
+        Parameters
+        ----------
+        path : Path
+            Path towards the checkpoint to use.
+
+        Returns
+        -------
+        dict
+            The emptied checkpoint.
+        """
+        # One cannot simply call checkpoint = super()._update_from_checkpoint(path)
+        # because we have to change the model class if it should be changed
+        # (and optimizer) aka if we find a specific key in the checkpoint
+        assert (
+            path.is_file()
+        ), f'Cannot load the model - does not exist {list(path.parent.glob("*"))}'
+        checkpoint = torch.load(path, map_location=self._device)
+        # For some reason substrafl save and load client before calling train
+        if "privacy_accountant_state_dict" in checkpoint:
+            self.accountant = RDPAccountant()
+            self.accountant.load_state_dict(
+                checkpoint.pop("privacy_accountant_state_dict")
+            )
+            self.sample_rate = checkpoint.pop("sample_rate")
+            self.size_train_dataset = checkpoint.pop("size_train_dataset")
+            self.noise_multiplier = checkpoint.pop("noise_multiplier")
+            # The init is messing up the fact that the model has become
+            # a grad sampler and the optimizer a DPOptimizer, their classes
+            # do not persist between serializations
+            # Those lines will allow to load corresponding state_dicts wo errors
+            if not isinstance(self._model, GradSampleModule):
+                self._model = wrap_model(self._model, grad_sample_mode="hooks")
+
+            if not isinstance(self._optimizer, DPOptimizer):
+                self._optimizer = DPOptimizer(
+                    self._optimizer,
+                    noise_multiplier=self.noise_multiplier,
+                    max_grad_norm=self.dp_max_grad_norm,
+                    expected_batch_size=self.batch_size,
+                )
+
+            self._optimizer.attach_step_hook(
+                self.accountant.get_optimizer_hook_fn(self.sample_rate)
+            )
+
+        self._model.load_state_dict(checkpoint.pop("model_state_dict"))
+
+        if self._optimizer is not None:
+            self._optimizer.load_state_dict(checkpoint.pop("optimizer_state_dict"))
+
+        if self._scheduler is not None:
+            self._scheduler.load_state_dict(checkpoint.pop("scheduler_state_dict"))
+
+        self._index_generator = checkpoint.pop("index_generator")
+
+        if self._device == torch.device("cpu"):
+            torch.set_rng_state(checkpoint.pop("rng_state").to(self._device))
+        else:
+            torch.cuda.set_rng_state(checkpoint.pop("rng_state").to("cpu"))
+
+        attr_names = [
+            "dp_max_grad_norm",
+            "dp_target_epsilon",
+            "dp_target_delta",
+            "num_rounds",
+            "num_updates",
+            "num_total_steps",
+            "batch_size",
+        ]
+
+        for attr in attr_names:
+            setattr(self, attr, checkpoint.pop(attr))
+
+        return checkpoint
diff --git a/fedeca/algorithms/torch_webdisco_algo.py b/fedeca/algorithms/torch_webdisco_algo.py
new file mode 100644
index 00000000..6581b4c0
--- /dev/null
+++ b/fedeca/algorithms/torch_webdisco_algo.py
@@ -0,0 +1,710 @@
+"""Implement webdisco algorithm with Torch."""
+import copy
+import os
+from copy import deepcopy
+from math import sqrt
+from pathlib import Path
+from typing import Any, List, Optional
+
+# hello
+import numpy as np
+import torch
+from autograd import elementwise_grad
+from autograd import numpy as anp
+from lifelines.utils import StepSizer
+from scipy.linalg import norm
+from scipy.linalg import solve as spsolve
+from substrafl.algorithms.pytorch import weight_manager
+from substrafl.algorithms.pytorch.torch_base_algo import TorchAlgo
+from substrafl.remote import remote_data
+from substrafl.strategies.schemas import StrategyName
+
+from fedeca.schemas import WebDiscoAveragedStates, WebDiscoSharedState
+from fedeca.utils.moments_utils import compute_uncentered_moment
+from fedeca.utils.survival_utils import MockStepSizer
+
+
+class TorchWebDiscoAlgo(TorchAlgo):
+    """WebDiscoAlgo class."""
+
+    def __init__(
+        self,
+        model: torch.nn.Module,
+        batch_size: Optional[int],
+        *args,
+        duration_col: str = "T",
+        event_col: str = "E",
+        treated_col: str = None,
+        initial_step_size: float = 0.95,
+        learning_rate_strategy: str = "lifelines",
+        standardize_data: bool = True,
+        tol: float = 1e-16,
+        penalizer: float = 0.0,
+        l1_ratio: float = 0.0,
+        propensity_model: torch.nn.Module = None,
+        propensity_strategy: str = "iptw",
+        store_hessian: bool = False,
+        with_batch_norm_parameters: bool = False,
+        use_gpu: bool = True,
+        robust: bool = False,
+        **kwargs,
+    ):
+        """Initialize the TorchWebdiscoAlgo class.
+
+        Parameters
+        ----------
+        model : torch.nn.Module
+            Model to use internally
+        batch_size : int, optional
+            Batch size for training
+        duration_col : str, optional
+            Column for the duration. Defaults to "T".
+        event_col : str, optional
+            Column for the event. Defaults to "E".
+        treated_col : str, optional
+            Column for the treatment. Defaults to None.
+        initial_step_size : float, optional
+            Initial step size. Defaults to 0.95.
+        learning_rate_strategy : str, optional
+            Strategy to follow for the learning rate. Defaults to "lifelines".
+        standardize_data : bool, optional
+            Whether to standardize data. Defaults to True.
+        tol : float, optional
+            Precision tolerance. Defaults to 1e-16.
+        penalizer : float, optional
+            Strength of the total penalization. Defaults to 0.0.
+        l1_ratio : float, optional
+            Ratio of the L1 penalization, should be in [0, 1]. Defaults to 0.0.
+        propensity_model : torch.nn.Module, optional
+            Propensity model to use. Defaults to None.
+        propensity_strategy : str, optional
+            Which covariates to use for the propensity model.
+            Both give different results because of non-collapsibility:
+            https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7986756/
+            Defaults to iptw, which will use only the treatment allocation as covariate.
+        store_hessian : bool, optional
+            Whether to store the Hessian. Defaults to False.
+        with_batch_norm_parameters : bool, optional
+            Whether to use batch norm parameters. Defaults to False.
+        use_gpu : bool, optional
+            Whether to use GPU for training. Defaults to True.
+        robust : bool, optional
+            Whether or not to store quantities specificallyy needed for robust
+            estimation.
+        """
+        assert "optimizer" not in kwargs, "WebDisco strategy does not uses optimizers"
+        assert "criterion" not in kwargs, "WebDisco strategy does not use criterion"
+        assert propensity_strategy in [
+            "iptw",
+            "aiptw",
+        ], """propensity strategy not
+        Implemented"""
+
+        super().__init__(
+            model=model,
+            criterion=None,
+            optimizer=None,
+            index_generator=None,
+            use_gpu=use_gpu,
+            # duration_col=duration_col,
+            # batch_size=batch_size,
+            # tol=tol,
+            # initial_step_size=initial_step_size,
+            # learning_rate_strategy=learning_rate_strategy,
+            # penalizer=penalizer,
+            # l1_ratio=l1_ratio,
+            # with_batch_norm_parameters=with_batch_norm_parameters,
+            **kwargs,
+        )
+        self._batch_size = batch_size
+        self._duration_col = duration_col
+        self._event_col = event_col
+        self._target_cols = [self._duration_col, self._event_col]
+        self._treated_col = treated_col
+        self._standardize_data = standardize_data
+        self._tol = tol
+        self._initial_step_size = initial_step_size
+        assert learning_rate_strategy in [
+            "lifelines",
+            "constant",
+        ], "Learning rate strategy not supported"
+        self._learning_rate_strategy = learning_rate_strategy
+        self._penalizer = penalizer
+        self._l1_ratio = l1_ratio
+        self._propensity_model = propensity_model
+        if self._propensity_model is not None:
+            assert (
+                self._treated_col is not None
+            ), "If you are using a propensity model you should provide the treated col"
+            self._propensity_model.eval()
+        self._propensity_strategy = propensity_strategy
+
+        self._store_hessian = store_hessian
+        self._with_batch_norm_parameters = with_batch_norm_parameters
+        self._robust = robust
+
+        self.server_state = {}
+        self.global_moments = None
+        # initialized and used only in the train method
+        self._final_gradients = None
+        self._final_hessian = None
+        self._n_samples_done = None
+
+        # TODO make this as clean as possible but frankly it's hard
+        # you want wargs to be simultaneously emty and not empty
+        for k in ["propensity_model", "robust"]:
+            self.kwargs[k] = copy.deepcopy(getattr(self, "_" + k))
+
+    @property
+    def strategies(self) -> List[StrategyName]:
+        """List of compatible strategies.
+
+        Returns
+        -------
+        typing.List[StrategyName]
+            List of compatible strategies.
+        """
+        return ["WebDisco"]
+
+    @remote_data
+    def compute_local_phi_stats(
+        self,
+        datasamples: Any,
+        # Set shared_state to None per default for clarity reason as
+        # the decorator will do it if the arg shared_state is not passed.
+        shared_state: Optional[WebDiscoAveragedStates] = None,
+    ) -> WebDiscoSharedState:
+        """Compute local updates.
+
+        Parameters
+        ----------
+        datasamples : Any
+            _description_
+        shared_state : Optional[WebDiscoAveragedStates], optional
+            _description_. Defaults to None.
+
+        Returns
+        -------
+        WebDiscoSharedState
+            _description_
+        """
+        if not hasattr(self, "server_state"):
+            self.server_state = {}
+        # We either already have global_statistics in the self or we don't
+        if shared_state is None:
+            # This is part of the stateless server
+            global_survival_statistics = self.server_state["global_survival_statistics"]
+            # We assume moments have already been given once to each client
+            # so that they updated their self.global_moments if standardize_data
+            # is true so moments can be None
+            moments = None
+        else:
+            # We initialize the self.server_state["global_survival_statistics"]
+            # that will be used
+            # throughout
+            global_survival_statistics = shared_state["global_survival_statistics"]
+            self.server_state["global_survival_statistics"] = global_survival_statistics
+            moments = shared_state["moments"]
+
+        X, y, weights = self.compute_X_y_and_propensity_weights(datasamples, moments)
+
+        distinct_event_times = global_survival_statistics["distinct_event_times"]
+
+        self._model.eval()
+        # The shape of expbetaTx is (N, 1)
+        X = torch.from_numpy(X)
+        expbetaTx = self._model(X).detach().numpy()
+        X = X.numpy()
+        betaTx = np.log(expbetaTx)  # very inefficient, but whatever
+        offset = betaTx.max(axis=0)
+        factor = np.exp(offset)
+        expbetaTx_stable = np.exp(betaTx - offset)
+        # for risk_phi each element is a scalar
+        risk_phi = []
+        # for risk_phi_x each element is of the dimension of a feature N,
+        risk_phi_x = []
+        # for risk_phi_x_x each element is of the dimension of a feature squared N, N
+        risk_phi_x_x = []
+        for _, t in enumerate(distinct_event_times):
+            Rt = np.where(np.abs(y) >= t)[0]
+            weights_for_rt = weights[Rt]
+            risk_phi.append(
+                factor
+                * (np.multiply(expbetaTx_stable[Rt], weights_for_rt).sum(axis=(0, 1)))
+            )
+            common_block = np.multiply(expbetaTx_stable[Rt] * weights_for_rt, X[Rt])
+            risk_phi_x.append(factor * common_block.sum(axis=0))
+            risk_phi_x_x.append(factor * np.einsum("ij,ik->jk", common_block, X[Rt]))
+        local_phi_stats = {}
+        local_phi_stats["risk_phi"] = risk_phi
+        local_phi_stats["risk_phi_x"] = risk_phi_x
+        local_phi_stats["risk_phi_x_x"] = risk_phi_x_x
+
+        return {
+            "local_phi_stats": local_phi_stats,
+            # The server being stateless we need to feed it perpetually
+            "global_survival_statistics": global_survival_statistics,
+        }
+
+    @remote_data
+    def local_uncentered_moments(self, datasamples, shared_state=None):
+        """Compute the local uncentered moments.
+
+        This method is transformed by the decorator to meet Substra API,
+        and is executed in the training nodes. See build_compute_plan.
+
+        Parameters
+        ----------
+        datasamples : pd.DataFrame
+            Dataframe returned by the opener.
+        shared_state : None, optional
+            Given by the aggregation node, here nothing, by default None.
+
+        Returns
+        -------
+        dict
+            Local results to be shared via shared_state to the aggregation node.
+        """
+        del shared_state  # unused
+        # We do not have to do the mean on the target columns
+        datasamples = datasamples.drop(columns=self._target_cols)
+        if self._propensity_model is not None and self._propensity_strategy == "iptw":
+            datasamples = datasamples.loc[:, [self._treated_col]]
+        results = {
+            f"moment{k}": compute_uncentered_moment(datasamples, k) for k in range(1, 3)
+        }
+        results["n_samples"] = datasamples.select_dtypes(include=np.number).count()
+        return results
+
+    @remote_data
+    def _compute_local_constant_survival_statistics(self, datasamples, shared_state):
+        """Computes local statistics and Dt for all ts in the distinct event times.
+        Those statistics are useful for to compute the global statistics that will be
+        used throughout training. The definition of :math:`\\mathcal{D}_t` (Dt)
+        associated to the value t is the set of indices of all the individuals that
+        experience an event at time t.
+
+        More formally:
+
+        .. math::
+
+            \\mathcal{D}_{t} = \{ i \in [0, n] | e_i = 0, t_i = t\}  # noqa W630
+
+
+
+        Parameters
+        ----------
+        tokens_list : list
+            Normally a list of size one since we should use all samples in one batch.
+
+        Returns
+        -------
+        dict
+            Where we can find the following keys 'sum_features_on_events',
+            'distinct_event_times', 'number_events_by_time' and 'total_number_samples',
+            where:
+            - "sum_features_on_events" contains the sum of the features
+                across samples for all the distinct event times of the given clients,
+                i.e. a single vector per time stamp
+            - "distinct_event_times": list of floating values containing the
+                unique times at which at least 1 death is registered in the
+                current dataset
+            - "number_events_by_time": number of events occurring at each
+            distinct_event_times
+            - "total_number_samples": total number of samples
+        """
+        X, y, weights = self.compute_X_y_and_propensity_weights(
+            datasamples, shared_state
+        )
+        distinct_event_times = np.unique(y[y > 0]).tolist()
+
+        sum_features_on_events = np.zeros(X.shape[1:])
+        number_events_by_time = []
+        weights_counts_on_events = []
+        for t in distinct_event_times:
+            Dt = np.where(y == t)[0]
+            num_events = len(Dt)
+            sum_features_on_events += (weights[Dt] * X[Dt, :]).sum(axis=0)
+            number_events_by_time.append(num_events)
+            weights_counts_on_events.append(weights[Dt].sum())
+
+        return {
+            "sum_features_on_events": sum_features_on_events,
+            "distinct_event_times": distinct_event_times,
+            "number_events_by_time": number_events_by_time,
+            "total_number_samples": X.shape[0],
+            "moments": shared_state,
+            "weights_counts_on_events": weights_counts_on_events,
+        }
+
+    @remote_data
+    def train(
+        self,
+        datasamples: Any,
+        # Set shared_state to None per default for clarity reason as
+        # the decorator will do it if the arg shared_state is not passed.
+        shared_state: Optional[WebDiscoAveragedStates] = None,
+    ) -> WebDiscoSharedState:
+        """Local train function.
+
+        Parameters
+        ----------
+        datasamples : Any
+            _description_
+        shared_state  : Optional[WebDiscoAveragedStates], optional
+            description_. Defaults to None.
+
+        Raises
+        ------
+        NotImplementedError
+            _description_
+
+        Returns
+        -------
+        WebDiscoSharedState
+            _description_
+        """
+        # We either simply update the model with NR update or we compute risk_phi_stats
+        gradient = shared_state["gradient"]
+        hessian = shared_state["hessian"]
+        second_part_ll = shared_state["second_part_ll"]
+        global_survival_statistics = self.server_state["global_survival_statistics"]
+        first_part_ll = deepcopy(
+            global_survival_statistics["global_sum_features_on_events"]
+        )
+
+        if "step_sizer" not in self.server_state:
+            if self._learning_rate_strategy == "lifelines":
+                self.server_state["step_sizer"] = StepSizer(self._initial_step_size)
+            else:
+                # use constant learning rate of 1.
+                self.server_state["step_sizer"] = MockStepSizer()
+            self.server_state["count_iter"] = 1
+            self.server_state["current_weights"] = np.zeros(
+                shared_state["gradient_shape"]
+            )
+
+        n = global_survival_statistics["total_number_samples"]
+
+        if self._penalizer > 0.0:
+            if self._learning_rate_strategy == "lifelines":
+                # This is used to multiply the penalty
+                # We use a smooth approximation for the L1 norm (for more details
+                # see docstring of function)
+                # we use numpy autograd to be able to compute the first and second
+                # order derivatives of this expression
+
+                def soft_abs(x, a):
+                    return 1 / a * (anp.logaddexp(0, -a * x) + anp.logaddexp(0, a * x))
+
+                def elastic_net_penalty(beta, a):
+                    l1 = self._l1_ratio * soft_abs(beta, a)
+                    l2 = 0.5 * (1 - self._l1_ratio) * (beta**2)
+                    reg = n * (self._penalizer * (l1 + l2)).sum()
+                    return reg
+
+                # Regularization affects both the gradient and the hessian
+                # producing a better conditioned hessian.
+                d_elastic_net_penalty = elementwise_grad(elastic_net_penalty)
+                dd_elastic_net_penalty = elementwise_grad(d_elastic_net_penalty)
+                # lifelines trick to progressively sharpen the approximation of
+                # the l1 regularization.
+                alpha = 1.3 ** self.server_state["count_iter"]
+                # We are trying to **maximize** the log-likelihood that is why
+                # we put a negative sign and not a plus sign on the regularization.
+                # The fact that we are actually moving towards the maximum and
+                # not towards the minimum is because -H is psd.
+                gradient -= d_elastic_net_penalty(
+                    self.server_state["current_weights"], alpha
+                )
+                hessian[
+                    np.diag_indices(shared_state["gradient_shape"])
+                ] -= dd_elastic_net_penalty(self.server_state["current_weights"], alpha)
+            else:
+                raise NotImplementedError
+
+        inv_h_dot_g_T = spsolve(-hessian, gradient, assume_a="pos", check_finite=False)
+
+        norm_delta = norm(inv_h_dot_g_T)
+
+        step_size = self.server_state["step_sizer"].update(norm_delta).next()
+        self.server_state["count_iter"] += 1
+        updates = step_size * inv_h_dot_g_T
+
+        # We keep the current version of the weights, because of ll computations
+        past_ll = (self.server_state["current_weights"] * first_part_ll).sum(
+            axis=0
+        ) + second_part_ll
+        self.server_state["current_weights"] += updates
+
+        weight_manager.increment_parameters(
+            model=self._model,
+            updates=[torch.from_numpy(updates[None, :])],
+            with_batch_norm_parameters=self._with_batch_norm_parameters,
+        )
+
+        # convergence criteria
+        if norm_delta < 1e-07:
+            converging, success = False, True
+        elif step_size <= 0.00001:
+            converging, success = False, False
+        else:
+            converging, success = True, False
+
+        self.server_state["converging"] = converging
+        self.server_state["success"] = success
+        self.server_state["past_ll"] = past_ll
+        # We store the hessian to compute standard deviations of coefficients and
+        # associated p-values
+        if self.server_state["count_iter"] > 10 or success or self._store_hessian:
+            self.server_state["hessian"] = hessian
+            self.server_state["gradient"] = gradient
+        # This needs to be in the state of the client for complicated reasons due
+        # to simu mode
+        if self._robust:
+            self.server_state["global_robust_statistics"] = {}
+            self.server_state["global_robust_statistics"][
+                "global_risk_phi_list"
+            ] = shared_state["global_risk_phi_list"]
+            self.server_state["global_robust_statistics"][
+                "global_risk_phi_x_list"
+            ] = shared_state["global_risk_phi_x_list"]
+            # TODO this renaming and moving around is useless and inefficient
+            self.server_state["global_robust_statistics"][
+                "global_weights_counts_on_events"
+            ] = self.server_state["global_survival_statistics"][
+                "weights_counts_on_events"
+            ]
+            self.server_state["global_robust_statistics"][
+                "distinct_event_times"
+            ] = self.server_state["global_survival_statistics"]["distinct_event_times"]
+
+        return self.compute_local_phi_stats(
+            datasamples=datasamples, shared_state=None, _skip=True
+        )
+
+    @remote_data
+    def predict(
+        self,
+        datasamples: Any,
+        shared_state: Any = None,
+        predictions_path: os.PathLike = None,
+        return_predictions: bool = False,
+    ) -> Any:
+        """Predict function.
+
+        Execute the following operations:
+
+            - Create the test torch dataset.
+            - Execute and return the results of the ``self._local_predict`` method
+
+        Parameters
+        ----------
+        datasamples : typing.Any
+            Input data
+        shared_state : typing.Any
+            Latest train task shared state (output of the train method)
+        predictions_path : os.PathLike
+            Destination file to save predictions
+        return_predictions: bool
+            Whether or not to make the method return predictions. Useful only
+            with simu mode.
+        """
+        X, _, _ = self.compute_X_y_and_propensity_weights(datasamples, shared_state)
+
+        X = torch.from_numpy(X)
+
+        self._model.eval()
+
+        predictions = self._model(X).cpu().detach().numpy()
+        if return_predictions:
+            return predictions
+        else:
+            self._save_predictions(predictions, predictions_path)
+
+    def _get_state_to_save(self) -> dict:
+        """Create the algo checkpoint: a dictionary saved with ``torch.save``.
+
+        In this algo, it contains the state to save for every strategy.
+        Reimplement in the child class to add strategy-specific variables.
+
+        Example
+        -------
+        .. code-block:: python
+            def _get_state_to_save(self) -> dict:
+                local_state = super()._get_state_to_save()
+                local_state.update({
+                    "strategy_specific_variable": self._strategy_specific_variable,
+                })
+                return local_state
+        Returns
+        -------
+        dict
+            checkpoint to save
+        """
+        checkpoint = super()._get_state_to_save()
+        checkpoint.update({"server_state": self.server_state})
+        checkpoint.update({"global_moments": self.global_moments})
+        return checkpoint
+
+    def _update_from_checkpoint(self, path: Path) -> dict:
+        """Load the local state from the checkpoint.
+
+        Parameters
+        ----------
+        path : pathlib.Path
+            Path where the checkpoint is saved
+
+        Returns
+        -------
+        dict
+            Checkpoint
+        """
+        checkpoint = super()._update_from_checkpoint(path=path)
+        self.server_state = checkpoint.pop("server_state")
+        self.global_moments = checkpoint.pop("global_moments")
+        return checkpoint
+
+    def summary(self):
+        """Summary of the class to be exposed in the experiment summary file.
+
+        Returns
+        -------
+        dict
+            A json-serializable dict with the attributes the user wants to store
+        """
+        summary = super().summary()
+        return summary
+
+    def build_X_y(self, datasamples, shared_state={}):
+        """Build appropriate X and y times from output of opener.
+
+        This function 1. uses the event column to inject the censorship
+        information present in the duration column (given in absolute values)
+        in the form of a negative sign.
+        2. Drop every covariate except treatment if self.strategy == "iptw".
+        3. Standardize the data if self.standardize_data AND if it receives
+        an outmodel.
+        4. Return the (unstandardized) input to the propensity model Xprop if
+        necessary as well as the treated column to be able to compute the
+        propensity weights.
+
+        Parameters
+        ----------
+        datasamples : pd.DataFrame
+            The output of the opener
+        shared_state : dict, optional
+            Outmodel containing global means and stds.
+            by default {}
+
+        Returns
+        -------
+        tuple
+            standardized X, signed times, treatment column and unstandardized
+            propensity model input
+        """
+        # We need y to be in the format (2*event-1)*duration
+        datasamples["time_multiplier"] = [
+            2.0 * e - 1.0 for e in datasamples[self._event_col].tolist()
+        ]
+        # No funny business irrespective of the convention used
+        y = np.abs(datasamples[self._duration_col]) * datasamples["time_multiplier"]
+        y = y.to_numpy().astype("float64")
+        datasamples = datasamples.drop(columns=["time_multiplier"])
+
+        # We drop the targets from X
+        columns_to_drop = self._target_cols
+        X = datasamples.drop(columns=columns_to_drop)
+        if self._propensity_model is not None and self._propensity_strategy == "iptw":
+            X = X.loc[:, [self._treated_col]]
+
+        # If X is to be standardized we do it
+        if self._standardize_data:
+            if shared_state:
+                # Careful this shouldn't happen apart from the predict
+                means = shared_state["global_uncentered_moment_1"]
+                vars = shared_state["global_centered_moment_2"]
+                # Careful we need to match pandas and use unbiased estimator
+                bias_correction = (shared_state["total_n_samples"]) / float(
+                    shared_state["total_n_samples"] - 1
+                )
+                self.global_moments = {
+                    "means": means,
+                    "vars": vars,
+                    "bias_correction": bias_correction,
+                }
+                stds = vars.transform(lambda x: sqrt(x * bias_correction + self._tol))
+                X = X.sub(means)
+                X = X.div(stds)
+            else:
+                X = X.sub(self.global_moments["means"])
+                stds = self.global_moments["vars"].transform(
+                    lambda x: sqrt(
+                        x * self.global_moments["bias_correction"] + self._tol
+                    )
+                )
+                X = X.div(stds)
+
+        X = X.to_numpy().astype("float64")
+
+        # If we have a propensity model we need to build X without the targets AND the
+        # treated column
+        if self._propensity_model is not None:
+            # We do not normalize the data for the propensity model !!!
+            Xprop = datasamples.drop(columns=columns_to_drop + [self._treated_col])
+            Xprop = Xprop.to_numpy().astype("float64")
+        else:
+            Xprop = None
+
+        # If WebDisco is used without propensity treated column does not exist
+        if self._treated_col is not None:
+            treated = (
+                datasamples[self._treated_col]
+                .to_numpy()
+                .astype("float64")
+                .reshape((-1, 1))
+            )
+        else:
+            treated = None
+
+        return (X, y, treated, Xprop)
+
+    def compute_X_y_and_propensity_weights(self, datasamples, shared_state):
+        """Build appropriate X, y and weights from raw output of opener.
+
+        Uses the helper function build_X_y and the propensity model to build the
+        weights.
+
+        Parameters
+        ----------
+        datasamples : pd.DataFrame
+            Raw output from opener
+        shared_state : dict, optional
+            Outmodel containing global means and stds, by default {}
+
+        Returns
+        -------
+        tuple
+            _description_
+        """
+        X, y, treated, Xprop = self.build_X_y(datasamples, shared_state)
+        if self._propensity_model is not None:
+            assert (
+                treated is not None
+            ), f"""If you are using a propensity model the {self._treated_col} (Treated)
+            column should be available"""
+            assert np.all(
+                np.in1d(np.unique(treated.astype("uint8"))[0], [0, 1])
+            ), "The treated column should have all its values in set([0, 1])"
+            Xprop = torch.from_numpy(Xprop)
+            with torch.no_grad():
+                propensity_scores = self._propensity_model(Xprop)
+
+            propensity_scores = propensity_scores.detach().numpy()
+            # We robustify the division
+            weights = treated * 1.0 / np.maximum(propensity_scores, self._tol) + (
+                1 - treated
+            ) * 1.0 / (np.maximum(1.0 - propensity_scores, self._tol))
+        else:
+            weights = np.ones((X.shape[0], 1))
+        return X, y, weights
diff --git a/fedeca/analytics/__init__.py b/fedeca/analytics/__init__.py
new file mode 100644
index 00000000..d9a24d2d
--- /dev/null
+++ b/fedeca/analytics/__init__.py
@@ -0,0 +1,3 @@
+"""Facilitate imports."""
+from .robust_variance_estimation import RobustCoxVariance
+from .robust_variance_cox_algo import RobustCoxVarianceAlgo
diff --git a/fedeca/analytics/robust_variance_cox_algo.py b/fedeca/analytics/robust_variance_cox_algo.py
new file mode 100644
index 00000000..7c4ce5ff
--- /dev/null
+++ b/fedeca/analytics/robust_variance_cox_algo.py
@@ -0,0 +1,254 @@
+"""Estimate the variance for mispecified Cox models."""
+import copy
+import sys
+from math import sqrt
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import torch
+from substrafl.remote import remote_data
+
+from fedeca.algorithms import TorchWebDiscoAlgo
+from fedeca.utils import make_substrafl_torch_dataset_class
+from fedeca.utils.survival_utils import CoxPHModelTorch, compute_q_k
+
+
+class RobustCoxVarianceAlgo(TorchWebDiscoAlgo):
+    """Implement local client method for robust cox variance estimation."""
+
+    def __init__(
+        self,
+        beta: np.ndarray,
+        variance_matrix: np.ndarray,
+        global_robust_statistics: dict[list[np.ndarray]],
+        propensity_model: torch.nn.Module,
+        duration_col: str,
+        event_col: str,
+        treated_col: str,
+        standardize_data: bool = True,
+        propensity_strategy: str = "iptw",
+        dtype: float = "float64",
+        tol: float = 1e-16,
+    ):
+        """Initialize Robust Cox Variance Algo.
+
+        Parameters
+        ----------
+        beta : np.ndarray
+            The weights of the trained Cox model.
+        variance_matrix: np.ndarray
+            The variance estimated in non robust mode aka H^{-1} rescaled
+            by computed_stds.
+        global_robust_statistics: dict[list[np.ndarray]]
+            The global statistics on risk sets and events needed for FL
+            computation.
+        propensity_model: torch.nn.Module
+            The propensity model trained.
+        duration_col : str
+            Column for the duration.
+        event_col : str, optional
+            Column for the event.
+        treated_col : str, optional
+            Column for the treatment.
+        standardize_data : bool, optional
+            Whether to standardize data. Defaults to True.
+        propensity_strategy : str, optional
+            Which covariates to use for the propensity model.
+            Both give different results because of non-collapsibility:
+            https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7986756/
+            Defaults to iptw, which will use only the treatment allocation as covariate.
+        dtype: str
+            The type of the data to generate from dataframe. Defaults to float64.
+        tol: float
+            The clipping to avoid zero division errors.
+        """
+        self.beta = beta
+        self.duration_col = duration_col
+        self.treated_col = treated_col
+        self.event_col = event_col
+        self.standardize_data = standardize_data
+        self.variance_matrix = variance_matrix
+        self._tol = tol
+
+        assert isinstance(global_robust_statistics, dict)
+        global_robust_statistics_arg = copy.deepcopy(global_robust_statistics)
+
+        assert all(
+            [
+                attr in global_robust_statistics_arg
+                for attr in [
+                    "global_weights_counts_on_events",
+                    "global_risk_phi_list",
+                    "global_risk_phi_x_list",
+                    "distinct_event_times",
+                    "global_moments",
+                ]
+            ]
+        )
+
+        global_moments = global_robust_statistics_arg.pop("global_moments")
+
+        assert all(
+            [
+                len(global_robust_statistics_arg["global_weights_counts_on_events"])
+                == len(v)
+                for k, v in global_robust_statistics_arg.items()
+                if k != "global_weights_counts_on_events"
+            ]
+        )
+
+        self.global_robust_statistics = global_robust_statistics_arg
+        if self.standardize_data:
+            computed_stds = (
+                global_moments["vars"]
+                .transform(
+                    lambda x: sqrt(x * global_moments["bias_correction"] + self._tol)
+                )
+                .to_numpy()
+            )
+        else:
+            computed_stds = np.ones((self.variance_matrix.shape[0])).squeeze()
+
+        # We initialize the Cox model to the final parameters from WebDisco
+        # that we need to unnormalize
+        fc1_weight = torch.from_numpy(beta * computed_stds)
+        # We need to scale the variance matrix
+        self.scaled_variance_matrix = (
+            self.variance_matrix
+            * np.tile(computed_stds, (self.variance_matrix.shape[0], 1)).T
+        )
+
+        class InitializedCoxPHModelTorch(CoxPHModelTorch):
+            def __init__(self):
+                super().__init__(ndim=1)
+                self.fc1.weight.data = fc1_weight
+
+        init_cox = InitializedCoxPHModelTorch()
+
+        survival_dataset_class = make_substrafl_torch_dataset_class(
+            [self.duration_col, self.event_col],
+            self.event_col,
+            self.duration_col,
+            dtype=dtype,
+        )
+        super().__init__(
+            model=init_cox,
+            batch_size=sys.maxsize,
+            dataset=survival_dataset_class,
+            propensity_model=propensity_model,
+            duration_col=duration_col,
+            event_col=event_col,
+            treated_col=treated_col,
+            standardize_data=standardize_data,
+            propensity_strategy=propensity_strategy,
+            tol=tol,
+        )
+        # Now AND ONLY NOW we give it the global mean and weights computed by WebDisco
+        # otherwise self.global_moments is set to None by
+        # WebDisco init
+        # TODO WebDisco init accept global_moments
+        self.global_moments = global_moments
+
+    @remote_data
+    def local_q_computation(self, datasamples: pd.DataFrame, shared_state=None):
+        """Compute Qk.
+
+        Parameters
+        ----------
+        datasamples : pd.DataFrame
+            Pandas dataframe provided by the opener.
+        shared_state : None
+            Unused here as this function only
+            use local information already present in the datasamples.
+            Defaults to None.
+
+        Returns
+        -------
+        np.ndarray
+            dictionary containing the local information on means, counts
+            and number of sample. This dict will be used as a state to be
+            shared to an AggregationNode in order to compute the aggregation
+            of the different analytics.
+        """
+        df = datasamples
+
+        distinct_event_times = self.global_robust_statistics["distinct_event_times"]
+        weights_counts_on_events = self.global_robust_statistics[
+            "global_weights_counts_on_events"
+        ]
+        risk_phi = self.global_robust_statistics["global_risk_phi_list"]
+        risk_phi_x = self.global_robust_statistics["global_risk_phi_x_list"]
+
+        (
+            X_norm,
+            y,
+            weights,
+        ) = self.compute_X_y_and_propensity_weights(df, shared_state=shared_state)
+
+        self._model.eval()
+        # The shape of expbetaTx is (N, 1)
+        X_norm = torch.from_numpy(X_norm)
+        score = self._model(X_norm).detach().numpy()
+        X_norm = X_norm.numpy()
+
+        phi_k, delta_betas_k, Qk = compute_q_k(
+            X_norm,
+            y,
+            self.scaled_variance_matrix,
+            distinct_event_times,
+            weights_counts_on_events,
+            risk_phi,
+            risk_phi_x,
+            score,
+            weights,
+        )
+
+        # The attributes below are private to the client
+        self._client_statistics = {}
+        self._client_statistics["phi_k"] = phi_k
+        self._client_statistics["delta_betas_k"] = delta_betas_k
+        self._client_statistics["Qk"] = Qk
+
+        return Qk
+
+    def _get_state_to_save(self) -> dict:
+        """Create the algo checkpoint: a dictionary saved with ``torch.save``.
+
+        In this algo, it contains the state to save for every strategy.
+        Reimplement in the child class to add strategy-specific variables.
+
+        Example
+        -------
+        .. code-block:: python
+            def _get_state_to_save(self) -> dict:
+                local_state = super()._get_state_to_save()
+                local_state.update({
+                    "strategy_specific_variable": self._strategy_specific_variable,
+                })
+                return local_state
+        Returns
+        -------
+        dict
+            checkpoint to save
+        """
+        checkpoint = super()._get_state_to_save()
+        checkpoint.update({"client_statistics": self._client_statistics})
+        return checkpoint
+
+    def _update_from_checkpoint(self, path: Path) -> dict:
+        """Load the local state from the checkpoint.
+
+        Parameters
+        ----------
+        path : pathlib.Path
+            Path where the checkpoint is saved
+
+        Returns
+        -------
+        dict
+            Checkpoint
+        """
+        checkpoint = super()._update_from_checkpoint(path=path)
+        self._client_statistics = checkpoint.pop("client_statistics")
+        return checkpoint
diff --git a/fedeca/analytics/robust_variance_estimation.py b/fedeca/analytics/robust_variance_estimation.py
new file mode 100644
index 00000000..c08cb942
--- /dev/null
+++ b/fedeca/analytics/robust_variance_estimation.py
@@ -0,0 +1,144 @@
+"""Estimate the variance for mispecified Cox models."""
+from typing import List
+
+import numpy as np
+from substrafl.nodes import AggregationNode, TrainDataNode
+from substrafl.remote import remote
+from substrafl.strategies.strategy import Strategy
+
+
+class RobustCoxVariance(Strategy):
+    """Launch robust variance estimation for cox models."""
+
+    def __init__(self, algo):
+        """Init robust cox variance estimation.
+
+        Parameters
+        ----------
+        algo : RobustCoxVarianceAlgo
+            An instance of RobustCoxVarianceAlgo.
+        """
+        super().__init__(algo=algo)
+
+        # States
+        self._local_states: Optional[List[LocalStateRef]] = None
+        self._shared_states: Optional[List[SharedStateRef]] = None
+
+    # We have to have instantiated name, perform_predict and performm_round
+    @property
+    def name(self):
+        """Set strategy name.
+
+        Returns
+        -------
+        StrategyName
+            Name of the strategy
+        """
+        return "Robust Cox Variance"
+
+    def perform_predict(self):
+        """Do nothing.
+
+        Only there so that the strategy is recognized as such by substrafl.
+        """
+        pass
+
+    def perform_round(self):
+        """Do nothing.
+
+        Only there so that the strategy is recognized as such by substrafl.
+        """
+        pass
+
+    @remote
+    def sum(self, shared_states: List[np.ndarray]):
+        """Compute sum of Qks.
+
+        Parameters
+        ----------
+        shared_states : List[np.ndarray]
+            list of dictionaries containing Qk.
+
+        Returns
+        -------
+        np.ndarray
+            Q matrix.
+        """
+        return sum(shared_states)
+
+    def build_compute_plan(
+        self,
+        train_data_nodes: List[TrainDataNode],
+        aggregation_node: AggregationNode,
+        num_rounds=None,
+        evaluation_strategy=None,
+        clean_models=False,
+    ):
+        """Build compute plan.
+
+        Method to build and link the different computations to execute with each
+        other. We will use the ``update_state``method of the nodes given as input to
+        choose which method to apply. For our example, we will only use TrainDataNodes
+        and AggregationNodes.
+
+        Parameters
+        ----------
+        train_data_nodes : List[TrainDataNode])
+            Nodes linked to the data
+            samples on which to compute analytics.
+        aggregation_node : AggregationNode)
+            Node on which to compute the
+            aggregation of the analytics extracted from the train_data_nodes.
+        num_rounds : Optional[int]
+            Num rounds to be used to iterate on
+            recurrent part of the compute plan. Defaults to None.
+        evaluation_strategy : Optional[substrafl.EvaluationStrategy]
+            Object storing the TestDataNode. Unused in this example. Defaults to None.
+        clean_models : bool
+            Clean the intermediary models of this round on
+            the Substra platform. Default to False.
+        """
+        if self.algo is None:
+            raise ValueError(
+                "You should initialize the algo of this strategy with a"
+                " RobustCoxVarianceAlgo."
+            )
+
+        qk_list = []
+
+        for node in train_data_nodes:
+            # Call local_first_order_computation on each train data node
+            next_local_state, next_shared_state = node.update_states(
+                self.algo.local_q_computation(
+                    node.data_sample_keys,
+                    shared_state=None,
+                    _algo_name=f"Computing local Qk {self.__class__.__name__}",
+                ),
+                local_state=None,
+                round_idx=0,
+                authorized_ids=set([node.organization_id]),
+                aggregation_id=aggregation_node.organization_id,
+                clean_models=False,
+            )
+
+            # All local analytics are stored in the first_order_shared_states,
+            # given as input the the aggregation method.
+            qk_list.append(next_shared_state)
+            # Just in case
+            # self._local_states.append(next_local_state)
+
+        # Call the aggregation method on the first_order_shared_states
+        self.Q = aggregation_node.update_states(
+            self.sum(
+                shared_states=qk_list,
+                _algo_name="Aggregating Qk into Q",
+            ),
+            round_idx=0,
+            authorized_ids=set(
+                [
+                    train_data_node.organization_id
+                    for train_data_node in train_data_nodes
+                ]
+            ),
+            clean_models=False,
+        )
diff --git a/fedeca/competitors.py b/fedeca/competitors.py
new file mode 100644
index 00000000..4177e993
--- /dev/null
+++ b/fedeca/competitors.py
@@ -0,0 +1,581 @@
+"""Implementation of competitors of FEDECA."""
+from typing import Literal, Optional
+
+import numpy as np
+import pandas as pd
+from indcomp import MAIC
+from lifelines.fitters.coxph_fitter import CoxPHFitter
+from sklearn.linear_model import LogisticRegression
+
+from fedeca.utils.survival_utils import (
+    BaseSurvivalEstimator,
+    BootstrapMixin,
+    compute_summary,
+)
+from fedeca.utils.typing import _SeedType
+
+
+class PooledIPTW(BaseSurvivalEstimator, BootstrapMixin):
+    """Class for the Pooled IPTW."""
+
+    def __init__(
+        self,
+        treated_col="treated",
+        event_col="E",
+        duration_col="T",
+        ps_col="propensity_scores",
+        effect="ATE",
+        variance_method: Literal["naive", "robust", "bootstrap"] = "naive",
+        n_bootstrap: int = 200,
+        seed: _SeedType = None,
+        cox_fit_kwargs=None,
+    ):
+        """Initialize Pooled Inverse Probability of Treatment Weighting estimator.
+
+        Parameters
+        ----------
+        treated_col : str, optional
+            Column name indicating treatment status, by default "treated".
+        event_col : str, optional
+            Column name indicating event occurrence, by default "E".
+        duration_col : str, optional
+            Column name indicating time to event or censoring, by default "T".
+        ps_col : str, optional
+            Column name indicating the propensity scores.
+        effect : str, optional
+            Effect type to estimate (ATE, ATC, or ATT), by default "ATE".
+        variance_method : `{"naive", "robust", "bootstrap"}`
+            Method for estimating the variance, and therefore the p-value of the
+            estimated treatment effect.
+            * "naive": Inverse of the Fisher information.
+            * "robust": The robust sandwich estimator. Useful when samples are
+              reweighted.
+            * "bootstrap": Bootstrap the given data by sampling each patient
+              with replacement, each time estimate the treatment effect, then
+              use all repeated estimations to compute the variance.
+        n_bootstrap : int
+            Number of bootstrap repetitions, only useful when `variance_method`
+            is set to "bootstrap", by default 200, as recommended in "Efron B,
+            Tibshirani RJ. An Introduction to the Bootstrap. Chapman & Hall:
+            New York, NY, 1993, (page 52)."
+        seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}
+            The seed for reproducibility, only useful when `variance_method` is
+            set to "bootstrap", by default None.
+        cox_fit_kwargs : dict or None, optional
+            Additional keyword arguments for Cox model fitting, by default None.
+        """
+        super().__init__(treated_col, event_col, duration_col, ps_col, seed)
+        self.effect = effect
+        if cox_fit_kwargs is None:
+            cox_fit_kwargs = {}
+        self.cox_fit_kwargs = cox_fit_kwargs
+        self.variance_method = variance_method
+        # cox_fit_kwargs takes priority
+        if variance_method == "naive":
+            self.cox_fit_kwargs.setdefault("robust", False)
+        elif variance_method == "robust":
+            self.cox_fit_kwargs.setdefault("robust", True)
+        self.n_bootstrap = n_bootstrap
+
+    def _estimate_effect(self, data: pd.DataFrame, weights: np.ndarray):
+        """Estimate treatment effect."""
+        # Estimate the effect from a weighted cox model
+        # -> Estimand is the hazard ratio
+        cox_model = CoxPHFitter()
+        cox_model.fit(
+            data[[self.duration_col, self.event_col, self.treated_col]].assign(
+                weights=weights
+            ),
+            self.duration_col,
+            self.event_col,
+            weights_col="weights",
+            **self.cox_fit_kwargs,
+        )
+        return cox_model
+
+    def _fit(
+        self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None
+    ) -> tuple[pd.DataFrame, float, np.ndarray, np.ndarray]:
+        """Estimate the treatment effect via Inverse Probability Treatment Weighting.
+
+        targets: pd.DataFrame, optional
+            pre-computed propensity scores.
+            It is possible to pass pre-computed propensity scores to the fit
+            function to use in the IPTW estimator instead of estimating the
+            scores using logistic regression.
+        """
+        if targets is None:
+            # Fit a logistic regression model to predict treatment assignment
+            #  based on the confounding variables
+            non_cov = [
+                self.treated_col,
+                self.event_col,
+                self.duration_col,
+                self.ps_col,
+            ]
+            covariates = [x for x in data.columns if x not in non_cov]
+            X = data[covariates]
+            logreg = LogisticRegression(solver="lbfgs", penalty=None)  # type: ignore
+            logreg.fit(X, data[self.treated_col])
+
+            # Compute the inverse probability weights
+            prob = logreg.predict_proba(X)[:, 1]
+        else:
+            prob = targets.to_numpy().flatten()
+
+        treated = data[self.treated_col] == 1
+        control = data[self.treated_col] == 0
+        weights = np.zeros_like(prob)
+        if self.effect == "ATE":
+            weights[treated] = np.divide(1, prob[treated])
+            weights[control] = np.divide(1, 1 - prob[control])
+
+        elif self.effect == "ATT":
+            weights = data[self.treated_col]
+            weights += prob * (1 - data[self.treated_col]) / (1 - prob)
+
+        results = self._estimate_effect(data, weights)
+        return results.summary, results.log_likelihood_, weights, prob
+
+    def point_estimate(self, data: pd.DataFrame) -> np.ndarray:
+        """Return a point estimate of the treatment effect."""
+        results, _, _, _ = self._fit(data)
+        return results["coef"]
+
+    def fit(self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None):
+        """Estimate the treatment effect via Inverse Probability Treatment Weighting.
+
+        Option to estimate the variance of estimation by bootstrapping.
+
+        targets: pd.DataFrame, optional
+            pre-computed propensity scores.
+            It is possible to pass pre-computed propensity scores to the fit
+            function to use in the IPTW estimator instead of estimating the
+            scores using logistic regression.
+        """
+        self.reset_state()
+
+        results, loglik, weights, ps_scores = self._fit(data, targets)
+
+        if self.variance_method == "bootstrap":
+            std = self.bootstrap_std(data, self.n_bootstrap, self.rng)
+            if std is not None:
+                results = compute_summary(results["coef"], std, index=results.index)
+
+        self.results_ = results
+        self.log_likelihood_ = loglik
+        self.propensity_scores_ = ps_scores
+        self.weights_ = weights
+
+
+# This is a wrapper for an existing implementation
+# that can be found here https://github.com/AidanCooper/indcomp
+# We have added the possibility to have more than one centers for
+# which aggregated data is available and implement the method from
+# Bucher et al., The results of direct and indirect treatment comparisons in
+# meta-analysis of randomized controlled trials, (1997)
+
+
+class MatchingAjudsted(BaseSurvivalEstimator, BootstrapMixin):
+    """Implement Matching-Adjusted Indirect Comparisons class.
+
+    We consider that we have access to individual patients data for one of the centers
+    and that for the other centers we only have access to aggregated data. This method
+    proposes a way to balance the distribution of the indivual patients data to match
+    the mean (and std) of a list of covariates available in both studies.
+    """
+
+    def __init__(
+        self,
+        treated_col="treated",
+        event_col="E",
+        duration_col="T",
+        ps_col="propensity_scores",
+        variance_method: Literal["naive", "robust", "bootstrap"] = "naive",
+        n_bootstrap: int = 200,
+        seed: _SeedType = None,
+        cox_fit_kwargs=None,
+    ):
+        """Initialize Matching-Adjusted Indirect Comparisons estimator.
+
+        Parameters
+        ----------
+        treated_col : str, optional
+            Column name indicating treatment status, by default "treated".
+        event_col : str, optional
+            Column name indicating event occurrence, by default "E".
+        duration_col : str, optional
+            Column name indicating time to event or censoring, by default "T".
+        ps_col : str, optional
+            Column name indicating propensity scores, by default "propensity_scores".
+        variance_method : `{"naive", "robust", "bootstrap"}`
+            Method for estimating the variance, and therefore the p-value of the
+            estimated treatment effect.
+            * "naive": Inverse of the Fisher information.
+            * "robust": The robust sandwich estimator. Useful when samples are
+              reweighted.
+            * "bootstrap": Bootstrap the given data, each time estimate the
+              treatment effect, then use all repeated estimations to compute the
+              variance.
+        n_bootstrap : int
+            Number of bootstrap repetitions, only useful when `variance_method`
+            is set to "bootstrap", by default 200, as recommended in "Efron B,
+            Tibshirani RJ. An Introduction to the Bootstrap. Chapman & Hall:
+            New York, NY, 1993, (page 52)."
+        seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}
+            The seed for reproducibility, only useful when `variance_method` is
+            set to "bootstrap", by default None.
+        cox_fit_kwargs : dict or None, optional
+            Additional keyword arguments for Cox model fitting, by default None.
+        """
+        super().__init__(treated_col, event_col, duration_col, ps_col, seed)
+        if cox_fit_kwargs is None:
+            cox_fit_kwargs = {}
+        self.cox_fit_kwargs = cox_fit_kwargs
+        self.variance_method = variance_method
+        # cox_fit_kwargs takes priority
+        if variance_method == "naive":
+            self.cox_fit_kwargs.setdefault("robust", False)
+        elif variance_method == "robust":
+            self.cox_fit_kwargs.setdefault("robust", True)
+        self.n_bootstrap = n_bootstrap
+
+    # Implementation for only 2 trials for now
+    # one with IPD and one with aggregated data
+
+    def _fit(
+        self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None
+    ) -> tuple[pd.DataFrame, float, np.ndarray]:
+        """Fit with reweighting on selected independent patient data.
+
+        Parameters
+        ----------
+        data: pd.DataFrame
+            Time-to-event datasets with part of rows to be reweighted before
+            the estimation of treatment effect.
+
+        targets: pd.DataFrame, optional
+            Dataframe containing marginal statistics of covariates in `data` to
+            be matched by reweighting. If None, assume a scenario grouped by
+            `treated_col` in `data`, rows with "treated_col == 0" will be used
+            to compute the marginal statistics.
+        """
+        non_cov = [
+            self.treated_col,
+            self.event_col,
+            self.duration_col,
+            self.ps_col,
+        ]
+        covariates = [x for x in data.columns if x not in non_cov]
+        matching_dict = {}
+        for col in covariates:
+            matching_dict[col + ".mean"] = ("mean", col)
+            matching_dict[col + ".std"] = ("std", col, col + ".mean")
+        if targets is None:
+            df_agg = data.groupby(self.treated_col)[covariates].agg(["mean", "std"])
+            df_agg.columns = [".".join(x) for x in df_agg.columns]
+            targets = pd.DataFrame(df_agg.loc[[0]])
+
+        m_reweight = data[self.treated_col].ne(0)
+        maic_model = MAIC(
+            df_index=data[m_reweight],
+            df_target=targets,
+            match=matching_dict,
+        )
+        maic_model.calc_weights()
+
+        weights = np.repeat(1.0, data.shape[0])
+        weights[data.index[m_reweight]] = maic_model.weights_
+        weights[weights <= 0] = 0.01
+
+        cox_model = CoxPHFitter()
+
+        cox_model.fit(
+            data[[self.duration_col, self.event_col, self.treated_col]].assign(
+                weights=weights
+            ),
+            self.duration_col,
+            self.event_col,
+            weights_col="weights",
+            **self.cox_fit_kwargs,
+        )
+
+        return cox_model.summary, cox_model.log_likelihood_, weights
+
+    def bootstrap_sample(
+        self, data: pd.DataFrame, seed: _SeedType = None
+    ) -> pd.DataFrame:
+        """Resampling only the individual patient data (IPD) with replacement.
+
+        In the setting of an estimation using MAIC, the caller is suppposed to have
+        access only to the individual patient data, assumed here to be marked by non-
+        zero treatment allocations in the data. Therefore during the resampling, only
+        accessible data should be resampled.
+        """
+        rng = np.random.default_rng(seed)
+        is_ipd = data[self.treated_col].ne(0)
+        # resample individual patient data and concatenate with the rest
+        data_resampled = data.loc[
+            np.concatenate(
+                [
+                    rng.choice(data.index[is_ipd], size=is_ipd.sum(), replace=True),
+                    data.index[~is_ipd],
+                ]
+            )
+        ]
+        return data_resampled
+
+    def point_estimate(self, data: pd.DataFrame) -> np.ndarray:
+        """Return a point estimate of the treatment effect."""
+        results, _, _ = self._fit(data)
+        return results["coef"]
+
+    def fit(self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None) -> None:
+        """Estimate the treatment effect via Inverse Probability Treatment Weighting.
+
+        Option to estimate the variance of estimation by bootstrapping.
+
+        targets: pd.DataFrame, optional
+            pre-computed propensity scores.
+            It is possible to pass pre-computed propensity scores to the fit
+            function to use in the IPTW estimator instead of estimating the
+            scores using logistic regression.
+        """
+        self.reset_state()
+
+        results, loglik, weights = self._fit(data, targets)
+
+        if self.variance_method == "bootstrap":
+            std = self.bootstrap_std(data, self.n_bootstrap, self.rng)
+            if std is not None:
+                results = compute_summary(results["coef"], std, index=results.index)
+
+        self.results_ = results
+        self.log_likelihood_ = loglik
+        self.weights_ = weights
+
+
+class NaiveComparison(BaseSurvivalEstimator, BootstrapMixin):
+    """Naive comparison as if in a randomized setting."""
+
+    def __init__(
+        self,
+        treated_col="treated",
+        event_col="E",
+        duration_col="T",
+        ps_col="propensity_scores",
+        variance_method: Literal["naive", "robust", "bootstrap"] = "naive",
+        n_bootstrap: int = 200,
+        seed: _SeedType = None,
+        cox_fit_kwargs=None,
+    ):
+        """Initialize Naive Comparison survival estimator.
+
+        Parameters
+        ----------
+        treated_col : str, optional
+            Column name indicating treatment status, by default "treated".
+        event_col : str, optional
+            Column name indicating event occurrence, by default "E".
+        duration_col : str, optional
+            Column name indicating time to event or censoring, by default "T".
+        ps_col : str, optional
+            Column name indicating the propensity scores.
+        variance_method : `{"naive", "robust", "bootstrap"}`
+            Method for estimating the variance, and therefore the p-value of the
+            estimated treatment effect.
+            * "naive": Inverse of the Fisher information.
+            * "robust": The robust sandwich estimator. Useful when samples are
+              reweighted.
+            * "bootstrap": Bootstrap the given data, each time estimate the
+              treatment effect, then use all repeated estimations to compute the
+              variance.
+        n_bootstrap : int
+            Number of bootstrap repetitions, only useful when `variance_method`
+            is set to "bootstrap", by default 200, as recommended in "Efron B,
+            Tibshirani RJ. An Introduction to the Bootstrap. Chapman & Hall:
+            New York, NY, 1993, (page 52)."
+        seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}
+            The seed for reproducibility, only useful when `variance_method` is
+            set to "bootstrap", by default None.
+        cox_fit_kwargs : dict or None, optional
+            Additional keyword arguments for Cox model fitting, by default None.
+        """
+        super().__init__(treated_col, event_col, duration_col, ps_col)
+        if cox_fit_kwargs is None:
+            cox_fit_kwargs = {}
+        self.cox_fit_kwargs = cox_fit_kwargs
+        self.variance_method = variance_method
+        # cox_fit_kwargs takes priority
+        if variance_method == "naive":
+            self.cox_fit_kwargs.setdefault("robust", False)
+        elif variance_method == "robust":
+            self.cox_fit_kwargs.setdefault("robust", True)
+        self.n_bootstrap = n_bootstrap
+
+    def _fit(
+        self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None
+    ) -> tuple[pd.DataFrame, float, np.ndarray]:
+        """Fit Naive Comparison estimator.
+
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Input data as a DataFrame.
+        targets : pd.DataFrame, optional
+            Target values associated with the input data, by default None.
+            In the current implementation targets argument is not used
+            by the fit function but is needed for the parent class.
+        """
+        cox_model = CoxPHFitter()
+        cox_model.fit(
+            data[[self.duration_col, self.event_col, self.treated_col]],
+            self.duration_col,
+            self.event_col,
+            **self.cox_fit_kwargs,
+        )
+        weights = np.repeat(1, data.shape[0])
+        return cox_model.summary, cox_model.log_likelihood_, weights
+
+    def point_estimate(self, data: pd.DataFrame) -> np.ndarray:
+        """Return a point estimate of the treatment effect."""
+        results, _, _ = self._fit(data)
+        return results["coef"]
+
+    def fit(self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None) -> None:
+        """Estimate the treatment effect via Inverse Probability Treatment Weighting.
+
+        Option to estimate the variance of estimation by bootstrapping.
+
+        targets: pd.DataFrame, optional
+            pre-computed propensity scores.
+            It is possible to pass pre-computed propensity scores to the fit
+            function to use in the IPTW estimator instead of estimating the
+            scores using logistic regression.
+        """
+        self.reset_state()
+
+        results, loglik, weights = self._fit(data, targets)
+
+        if self.variance_method == "bootstrap":
+            std = self.bootstrap_std(data, self.n_bootstrap, self.rng)
+            if std is not None:
+                results = compute_summary(results["coef"], std, index=results.index)
+
+        self.results_ = results
+        self.log_likelihood_ = loglik
+        self.weights_ = weights
+
+
+class CovariateAdjusted(BaseSurvivalEstimator, BootstrapMixin):
+    """Covariates adjusted IPTW."""
+
+    def __init__(
+        self,
+        treated_col="treated",
+        event_col="E",
+        duration_col="T",
+        ps_col="propensity_scores",
+        variance_method: Literal["naive", "robust", "bootstrap"] = "naive",
+        n_bootstrap: int = 200,
+        seed: _SeedType = None,
+        cox_fit_kwargs=None,
+    ):
+        """Initialize Covariate-Adjusted survival estimator.
+
+        Parameters
+        ----------
+        treated_col : str, optional
+            Column name indicating treatment status, by default "treated".
+        event_col : str, optional
+            Column name indicating event occurrence, by default "E".
+        duration_col : str, optional
+            Column name indicating time to event or censoring, by default "T".
+        ps_col : str, optional
+            Column name indicating propensity scores, by default "propensity_scores".
+        variance_method : `{"naive", "robust", "bootstrap"}`
+            Method for estimating the variance, and therefore the p-value of the
+            estimated treatment effect.
+            * "naive": Inverse of the Fisher information.
+            * "robust": The robust sandwich estimator. Useful when samples are
+              reweighted.
+            * "bootstrap": Bootstrap the given data, each time estimate the
+              treatment effect, then use all repeated estimations to compute the
+              variance.
+        n_bootstrap : int
+            Number of bootstrap repetitions, only useful when `variance_method`
+            is set to "bootstrap", by default 200, as recommended in "Efron B,
+            Tibshirani RJ. An Introduction to the Bootstrap. Chapman & Hall:
+            New York, NY, 1993, (page 52)."
+        seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}
+            The seed for reproducibility, only useful when `variance_method` is
+            set to "bootstrap", by default None.
+        cox_fit_kwargs : dict or None, optional
+            Additional keyword arguments for Cox model fitting, by default None.
+        """
+        super().__init__(treated_col, event_col, duration_col, ps_col)
+        if cox_fit_kwargs is None:
+            cox_fit_kwargs = {}
+        self.cox_fit_kwargs = cox_fit_kwargs
+        self.variance_method = variance_method
+        # cox_fit_kwargs takes priority
+        if variance_method == "naive":
+            self.cox_fit_kwargs.setdefault("robust", False)
+        elif variance_method == "robust":
+            self.cox_fit_kwargs.setdefault("robust", True)
+        self.n_bootstrap = n_bootstrap
+
+    def _fit(
+        self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None
+    ) -> tuple[pd.DataFrame, float, np.ndarray]:
+        """Fit Covariate-Adjusted estimator.
+
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Input data as a DataFrame.
+        targets : pd.DataFrame, optional
+            Target values associated with the input data, by default None.
+            In the current implementation targets argument is not used
+            by the fit function but is needed for the parent class.
+        """
+        non_cov = [self.treated_col, self.event_col, self.duration_col, self.ps_col]
+        covariates = [x for x in data.columns if x not in non_cov]
+        cox_model = CoxPHFitter()
+        cox_model.fit(
+            data[[self.treated_col, self.event_col, self.duration_col] + covariates],
+            self.duration_col,
+            self.event_col,
+            **self.cox_fit_kwargs,
+        )
+        weights = np.repeat(1, data.shape[0])
+        return cox_model.summary, cox_model.log_likelihood_, weights
+
+    def point_estimate(self, data: pd.DataFrame) -> np.ndarray:
+        """Return a point estimate of the treatment effect."""
+        results, _, _ = self._fit(data)
+        return results["coef"]
+
+    def fit(self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None) -> None:
+        """Estimate the treatment effect via Inverse Probability Treatment Weighting.
+
+        Option to estimate the variance of estimation by bootstrapping.
+
+        targets: pd.DataFrame, optional
+            pre-computed propensity scores.
+            It is possible to pass pre-computed propensity scores to the fit
+            function to use in the IPTW estimator instead of estimating the
+            scores using logistic regression.
+        """
+        self.reset_state()
+
+        results, loglik, weights = self._fit(data, targets)
+
+        if self.variance_method == "bootstrap":
+            std = self.bootstrap_std(data, self.n_bootstrap, self.rng)
+            if std is not None:
+                results = compute_summary(results["coef"], std, index=results.index)
+
+        self.results_ = results
+        self.log_likelihood_ = loglik
+        self.weights_ = weights
diff --git a/fedeca/fedeca_core.py b/fedeca/fedeca_core.py
new file mode 100644
index 00000000..48c76704
--- /dev/null
+++ b/fedeca/fedeca_core.py
@@ -0,0 +1,835 @@
+"""Federate causal inference on distributed data."""
+import sys
+import time
+from collections.abc import Callable
+from typing import Optional, Union
+
+import numpy as np
+import pandas as pd
+import torch
+from scipy.linalg import inv
+from substra.sdk.models import ComputePlanStatus
+from substrafl.algorithms.pytorch import TorchNewtonRaphsonAlgo
+from substrafl.model_loading import download_algo_state
+from substrafl.nodes import AggregationNode, TrainDataNode
+from substrafl.strategies import FedAvg, NewtonRaphson
+from torch import nn
+from torch.optim import SGD, Optimizer
+
+from fedeca.algorithms import TorchWebDiscoAlgo
+from fedeca.algorithms.torch_dp_fed_avg_algo import TorchDPFedAvgAlgo
+from fedeca.analytics import RobustCoxVariance, RobustCoxVarianceAlgo
+from fedeca.strategies import WebDisco
+from fedeca.strategies.webdisco_utils import (
+    compute_summary_function,
+    get_final_cox_model_function,
+)
+from fedeca.utils import (
+    Experiment,
+    make_accuracy_function,
+    make_c_index_function,
+    make_substrafl_torch_dataset_class,
+)
+from fedeca.utils.data_utils import split_dataframe_across_clients
+from fedeca.utils.substrafl_utils import get_outmodel_function
+from fedeca.utils.survival_utils import BaseSurvivalEstimator, CoxPHModelTorch
+
+
+class FedECA(Experiment, BaseSurvivalEstimator):
+    """FedECA class tthat performs Federated IPTW."""
+
+    def __init__(
+        self,
+        ndim: int,
+        ds_client=None,
+        train_data_nodes: Union[list[TrainDataNode], None] = None,
+        treated_col: str = "treated",
+        event_col: str = "E",
+        duration_col: str = "T",
+        ps_col="propensity_scores",
+        num_rounds_list: list[int] = [10, 10],
+        damping_factor_nr: float = 0.8,
+        l2_coeff_nr: float = 0.0,
+        standardize_data: bool = True,
+        penalizer: float = 0.0,
+        l1_ratio: float = 1.0,
+        initial_step_size: float = 0.95,
+        learning_rate_strategy: str = "lifelines",
+        dtype: float = "float64",
+        propensity_strategy="iptw",
+        robust: bool = False,
+        dp_target_epsilon: Union[float, None] = None,
+        dp_target_delta: Union[float, None] = None,
+        dp_max_grad_norm: Union[float, None] = None,
+        dp_propensity_model_optimizer_class: Optimizer = SGD,
+        dp_propensity_model_optimizer_kwargs: Union[dict, None] = None,
+        dp_propensity_model_training_params: Union[dict, None] = None,
+        seed: int = 42,
+        aggregation_node: Union[AggregationNode, None] = None,
+        experiment_folder: str = "./iptw_experiment",
+        clean_models: bool = False,
+        dependencies: Union[list, None] = None,
+        timeout: int = 3600,
+        sleep_time: int = 30,
+        fedeca_path: Union[None, str] = None,
+        evaluation_frequency=None,
+    ):
+        """Initialize the Federated IPTW class.
+
+        Implements the FedECA algorithm which combines
+        an estimation of propensity scores using logistic regression
+        and the fit of a weighted Cox Model in a federated fashion.
+
+        Parameters
+        ----------
+        client : fl.client.Client
+            Federated Learning client object.
+        train_data_nodes : list
+            List of data nodes participating in the federated training.
+        ndim : int
+            Number of dimensions (features) in the dataset.
+        treated_col : str, optional
+            Column name indicating treatment status, by default "treated".
+        event_col : str, optional
+            Column name indicating event occurrence, by default "E".
+        duration_col : str, optional
+            Column name indicating time to event or censoring, by default "T".
+        num_rounds_list : list, optional
+            List of number of rounds for each stage, by default [10, 10].
+        damping_factor_nr : float, optional
+            Damping factor for natural gradient regularization, by default 0.8.
+        l2_coeff_nr : float, optional
+            L2 regularization coefficient for natural gradient, by default 0.0.
+        standardize_data : bool, optional
+            Whether to standardize data before training, by default True.
+        penalizer : float, optional
+            Penalizer for IPTW objective, by default 0.0.
+        l1_ratio : float, optional
+            L1 ratio for IPTW objective, by default 1.0.
+        initial_step_size : float, optional
+            Initial step size for optimization, by default 0.95.
+        learning_rate_strategy : str, optional
+            Learning rate strategy, by default "lifelines".
+        batch_size : int, optional
+            Batch size for optimization, by default sys.maxsize.
+        dtype : str, optional
+            Data type for the model, by default "float64".
+        propensity_strategy: str, optional
+            The propensity strategy to use.
+        robust: bool, optional
+            Whether or not to use robust estimator of variance as in [1] and
+            lifelines.
+            Defauts to False.
+            [1] David A Binder. Fitting cox’s proportional hazards models from survey data. Biometrika, 79(1):139–147, 1992.  # noqa: E501
+        dp_target_epsilon: float
+            The target epsilon for (epsilon, delta)-differential
+            private guarantee. Defaults to None.
+        dp_target_delta: float
+            The target delta for (epsilon, delta)-differential
+            private guarantee. Defaults to None.
+        dp_max_grad_norm: float
+            The maximum L2 norm of per-sample gradients;
+            used to enforce differential privacy. Defaults to None.
+        dp_propensity_model_optimizer_class: torch.optim.Optimizer
+            The optimizer to use for the training of the propensity model.
+            Defauts to Adam.
+        dp_propensity_model_optimizer_class_kwargs: dict
+            The params to give to optimizer class.
+        dp_propensity_model_training_params: dict
+            A dict with keys batch_size and num_updates for the DP-SGD training.
+            Defaults to None.
+        seed : int, optional
+            Seed for random number generation, by default 42.
+        aggregation_node : str or None, optional
+            Node for aggregation, by default None.
+        experiment_folder : str, optional
+            Folder path for experiment outputs, by default "./iptw_experiment".
+        clean_models : bool, optional
+            Whether to clean models after training, by default False.
+        dependencies : list, optional
+            List of dependencies, by default None.
+        timeout : int, optional
+            Timeout for a single round of federated learning, by default 3600.
+        sleep_time : int, optional
+            Sleep time between rounds, by default 30.
+        fedeca_path:
+            Path towards the fedeca reository.
+        evaluation_frequency:
+            Evaluation_frequency.
+        **kwargs
+            Additional keyword arguments.
+        """
+        self.standardize_data = standardize_data
+        assert dtype in ["float64", "float32", "float16"]
+        if dtype == "float64":
+            self.torch_dtype = torch.float64
+        elif dtype == "float32":
+            self.torch_dtype = torch.float32
+        else:
+            self.torch_dtype = torch.float16
+
+        self.ndim = ndim
+        self.treated_col = treated_col
+        self.event_col = event_col
+        self.duration_col = duration_col
+        self.ps_col = ps_col
+        self.seed = seed
+        self.penalizer = penalizer
+        self.l1_ratio = l1_ratio
+        self.initial_step_size = initial_step_size
+        self.learning_rate_strategy = learning_rate_strategy
+        self.num_rounds_list = num_rounds_list
+        self.timeout = timeout
+        self.sleep_time = sleep_time
+        self.damping_factor_nr = damping_factor_nr
+        self.l2_coeff_nr = l2_coeff_nr
+        self.propensity_strategy = propensity_strategy
+        self.robust = robust
+        self.dp_target_delta = dp_target_delta
+        self.dp_target_epsilon = dp_target_epsilon
+        self.dp_max_grad_norm = dp_max_grad_norm
+        self.dp_propensity_model_training_params = dp_propensity_model_training_params
+        self.dp_propensity_model_optimizer_class = dp_propensity_model_optimizer_class
+        self.dp_propensity_model_optimizer_kwargs = dp_propensity_model_optimizer_kwargs
+        self.dependencies = dependencies
+        self.experiment_folder = experiment_folder
+        self.fedeca_path = fedeca_path
+        self.evaluation_frequency = evaluation_frequency
+        self.dtype = dtype
+
+        kwargs = {}
+        kwargs["algo_dependencies"] = self.dependencies
+        self.accuracy_metrics_dict = {
+            "accuracy": make_accuracy_function(self.treated_col)
+        }
+        self.cindex_metrics_dict = {
+            "C-index": make_c_index_function(
+                event_col=self.event_col, duration_col=self.duration_col
+            )
+        }
+        self.metrics_dicts_list = [
+            self.accuracy_metrics_dict,
+            self.cindex_metrics_dict,
+        ]
+
+        # Note that we don't use self attributes because substrafl classes are messed up
+        # and we don't want confusion
+        self.logreg_model = LogisticRegressionTorch(self.ndim, self.torch_dtype)
+        self.logreg_dataset_class = make_substrafl_torch_dataset_class(
+            [self.treated_col],
+            self.event_col,
+            self.duration_col,
+            dtype=dtype,
+            return_torch_tensors=True,
+        )
+        # Set propensity model training to DP or not DP mode
+        self.set_propensity_model_strategy()
+
+        # We use only the treatment variable in the model
+        cox_model = CoxPHModelTorch(ndim=1, torch_dtype=self.torch_dtype)
+        survival_dataset_class = make_substrafl_torch_dataset_class(
+            [self.duration_col, self.event_col],
+            self.event_col,
+            self.duration_col,
+            dtype=dtype,
+        )
+
+        # no self attributes in this class !!!!!!
+        class WDAlgo(TorchWebDiscoAlgo):
+            def __init__(self, propensity_model, robust):
+                super().__init__(
+                    model=cox_model,
+                    # TODO make this batch-size argument disappear from
+                    # webdisco algo
+                    batch_size=sys.maxsize,
+                    dataset=survival_dataset_class,
+                    seed=seed,
+                    duration_col=duration_col,
+                    event_col=event_col,
+                    treated_col=treated_col,
+                    standardize_data=standardize_data,
+                    penalizer=penalizer,
+                    l1_ratio=l1_ratio,
+                    initial_step_size=initial_step_size,
+                    learning_rate_strategy=learning_rate_strategy,
+                    store_hessian=True,
+                    propensity_model=propensity_model,
+                    propensity_strategy=propensity_strategy,
+                    robust=robust,
+                )
+                self._propensity_model = propensity_model
+
+        self.webdisco_algo = WDAlgo(propensity_model=None, robust=self.robust)
+        self.webdisco_strategy = WebDisco(
+            algo=self.webdisco_algo, standardize_data=self.standardize_data
+        )
+
+        kwargs["strategies"] = [self.propensity_model_strategy, self.webdisco_strategy]
+        if self.robust:
+            # We prepare robust estimation
+            class MockAlgo:
+                def __init__(self):
+                    self.strategies = ["Robust Cox Variance"]
+
+            mock_algo = MockAlgo()
+            kwargs["strategies"].append(
+                RobustCoxVariance(
+                    algo=mock_algo,
+                )
+            )
+            # We need those two lines for the zip to consider all 3
+            # strategies
+            self.metrics_dicts_list.append({})
+            self.num_rounds_list.append(sys.maxsize)
+
+        kwargs["metrics_dicts_list"] = self.metrics_dicts_list
+        kwargs["ds_client"] = ds_client
+        kwargs["train_data_nodes"] = train_data_nodes
+        kwargs["aggregation_node"] = aggregation_node
+        kwargs["experiment_folder"] = self.experiment_folder
+        kwargs["clean_models"] = clean_models
+        kwargs["num_rounds_list"] = self.num_rounds_list
+        kwargs["fedeca_path"] = self.fedeca_path
+        kwargs["algo_dependencies"] = self.dependencies
+        kwargs["evaluation_frequency"] = self.evaluation_frequency
+
+        # TODO: test_data_nodes and evaluation_frequency are not passed
+
+        super().__init__(**kwargs)
+
+    def check_cp_status(self, idx=0):
+        """Check the status of the process."""
+        training_type = "training"
+        if idx == 0:
+            model_name = "Propensity Model"
+        elif idx == 1:
+            model_name = "Weighted Cox Model"
+        else:
+            model_name = "Robust Variance"
+            training_type = "estimation"
+
+        print(f"Waiting on {model_name} {training_type} to finish...")
+        t1 = time.time()
+        t2 = t1
+        while (t2 - t1) < self.timeout:
+            status = self.ds_client.get_compute_plan(
+                self.compute_plan_keys[idx].key
+            ).status
+            if status == ComputePlanStatus.done:
+                print(
+                    f"""Compute plan {self.compute_plan_keys[0].key} of {model_name} has
+                    finished !"""
+                )
+                break
+            elif (
+                status == ComputePlanStatus.failed
+                or status == ComputePlanStatus.canceled
+            ):
+                raise ValueError(
+                    f"""Compute plan {self.compute_plan_keys[0].key} of {model_name} has
+                    failed"""
+                )
+            elif (
+                status == ComputePlanStatus.doing
+                or status == ComputePlanStatus.todo
+                or status == ComputePlanStatus.waiting
+            ):
+                pass
+            else:
+                print(
+                    f"""Compute plan status is {status}, this shouldn't happen, sleeping
+                    {self.time_sleep} and retrying until timeout {self.timeout}"""
+                )
+            time.sleep(self.sleep_time)
+
+    def set_propensity_model_strategy(self):
+        """Set FedECA to use DP.
+
+        At the end it sets the parameter self.propensity_model_strateg
+        """
+        self.dp_params_given = [
+            self.dp_max_grad_norm is not None,
+            self.dp_target_epsilon is not None,
+            self.dp_target_delta is not None,
+        ]
+
+        if any(self.dp_params_given) and not all(self.dp_params_given):
+            raise ValueError(
+                "To use DP you should provide values for all DP parameters: "
+                "dp_max_grad_norm, dp_target_epsilon and dp_target_delta"
+            )
+        self._apply_dp = all(self.dp_params_given)
+        if self._apply_dp:
+            assert (
+                self.dp_propensity_model_training_params is not None
+            ), "You should give dp_propensity_model_training_params"
+            "={'batch_size': ?, 'num_updates': ?}"
+            assert (
+                "batch_size" in self.dp_propensity_model_training_params
+                and "num_updates" in self.dp_propensity_model_training_params
+            ), "You should fill all fields of dp_propensity_model_training_params"
+            "={'batch_size': ?, 'num_updates': ?}"
+            if self.dp_propensity_model_optimizer_kwargs is None:
+                self.dp_propensity_model_optimizer_kwargs = {}
+            dp_propensity_model_optimizer = self.dp_propensity_model_optimizer_class(
+                params=self.logreg_model.parameters(),
+                **self.dp_propensity_model_optimizer_kwargs,
+            )
+            num_rounds_propensity = self.num_rounds_list[0]
+
+            # no self attributes in this class !!!!!!
+            # fed_iptw_self = self hack doesn't work for serialization issue
+            logreg_model = self.logreg_model
+            logreg_dataset_class = self.logreg_dataset_class
+            seed = self.seed
+            num_updates = self.dp_propensity_model_training_params["num_updates"]
+            batch_size = self.dp_propensity_model_training_params["batch_size"]
+            dp_target_epsilon = self.dp_target_epsilon
+            dp_target_delta = self.dp_target_delta
+            dp_max_grad_norm = self.dp_max_grad_norm
+
+            class DPLogRegAlgo(TorchDPFedAvgAlgo):
+                def __init__(self):
+                    super().__init__(
+                        model=logreg_model,
+                        criterion=nn.BCELoss(),
+                        optimizer=dp_propensity_model_optimizer,
+                        dataset=logreg_dataset_class,
+                        seed=seed,
+                        num_updates=num_updates,
+                        batch_size=batch_size,
+                        num_rounds=num_rounds_propensity,
+                        dp_target_epsilon=dp_target_epsilon,
+                        dp_target_delta=dp_target_delta,
+                        dp_max_grad_norm=dp_max_grad_norm,
+                    )
+
+            self.dp_algo = DPLogRegAlgo()
+            self.dp_strategy = FedAvg(algo=self.dp_algo)
+            self.propensity_model_strategy = self.dp_strategy
+        else:
+            # no self attributes in this class
+            # fed_iptw_self = self hack doesn't work for serialization issue
+            logreg_model = self.logreg_model
+            logreg_dataset_class = self.logreg_dataset_class
+            seed = self.seed
+            l2_coeff_nr = self.l2_coeff_nr
+
+            class NRAlgo(TorchNewtonRaphsonAlgo):
+                def __init__(self):
+                    super().__init__(
+                        model=logreg_model,
+                        batch_size=sys.maxsize,
+                        criterion=nn.BCELoss(),
+                        dataset=logreg_dataset_class,
+                        seed=seed,
+                        l2_coeff=l2_coeff_nr,
+                    )
+
+            self.nr_algo = NRAlgo()
+            self.nr_strategy = NewtonRaphson(
+                damping_factor=self.damping_factor_nr, algo=self.nr_algo
+            )
+            self.propensity_model_strategy = self.nr_strategy
+
+    def reset_experiment(self):
+        """Remove the propensity model just in case."""
+        super().reset_experiment()
+        if hasattr(self, "propensity_model"):
+            self.propensity_model = None
+
+    def fit(
+        self,
+        data: pd.DataFrame,
+        targets: Optional[pd.DataFrame] = None,
+        n_clients: Union[int, None] = None,
+        split_method: Union[Callable, None] = None,
+        split_method_kwargs: Union[Callable, None] = None,
+        data_path: Union[str, None] = None,
+        robust: Union[bool, None] = None,
+        dp_target_epsilon: Union[float, None] = None,
+        dp_target_delta: Union[float, None] = None,
+        dp_max_grad_norm: Union[float, None] = None,
+        dp_propensity_model_training_params: Union[dict, None] = None,
+        dp_propensity_model_optimizer_class: Union[Optimizer, None] = None,
+        dp_propensity_model_optimizer_kwargs: Union[dict, None] = None,
+        backend_type: str = "subprocess",
+        urls: Union[list[str], None] = None,
+        server_org_id: Union[str, None] = None,
+        tokens: Union[list[str], None] = None,
+    ):
+        """Fit strategies on global data split across clients.
+
+        For test if provided we use test_data_nodes from int or the
+        train_data_nodes in the latter train=test.
+
+        Parameters
+        ----------
+        data : pd.DataFrame
+            The global data to be split has to be a dataframe as we only support
+            one opener type.
+        targets : Optional[pd.DataFrame], optional
+            A dataframe with propensity score or nothing.
+        nb_clients : Union[int, None], optional
+            The number of clients used to split data across, by default None
+        split_method : Union[Callable, None], optional
+            How to split data across the nb_clients, by default None
+        split_method_kwargs : Union[Callable, None], optional
+            Argument of the function used to split data, by default None
+        data_path : Union[str, None]
+            Where to store the data on disk when backend is not remote.
+        robust: Union[None, bool], optional
+            Whether or not to use robust estimator of variance as in [1] and
+            lifelines.
+            Defauts to False.
+            [1] David A Binder. Fitting cox’s proportional hazards models from survey data. Biometrika, 79(1):139–147, 1992.  # noqa: E501
+        dp_target_epsilon: float
+            The target epsilon for (epsilon, delta)-differential
+            private guarantee. Defaults to None.
+        dp_target_delta: float
+            The target delta for (epsilon, delta)-differential
+            private guarantee. Defaults to None.
+        dp_max_grad_norm: float
+            The maximum L2 norm of per-sample gradients;
+            used to enforce differential privacy. Defaults to None.
+        dp_propensity_model_optimizer_class: torch.optim.Optimizer
+            The optimizer to use for the training of the propensity model.
+            Defauts to Adam.
+        dp_propensity_model_optimizer_class_kwargs: dict
+            The params to give to optimizer class.
+        dp_propensity_model_training_params: dict
+            A dict with keys batch_size and num_updates for the DP-SGD training.
+            Defaults to None.
+        backend_type: str
+            The backend to use for substra. Can be either:
+            ["subprocess", "docker", "remote"]. Defaults to "subprocess".
+        urls: Union[list[str], None]
+            Urls corresponding to clients API if using remote backend_type.
+            Defaults to None.
+        server_org_id: Union[str, None]
+            Url corresponding to server API if using remote backend_type.
+            Defaults to None.
+        tokens: Union[list[str], None]
+            Tokens necessary to authenticate each client API if backend_type
+            is remote. Defauts to None.
+        """
+        # Reset experiment so that it can fit on a new dataset
+        self.reset_experiment()
+        if backend_type != "remote" and (
+            urls is not None or server_org_id is not None or tokens is not None
+        ):
+            print(
+                "urls, server_org_id and tokens are ignored if backend_type is "
+                "not remote; Make sure that you launched the fit with the right"
+                " combination of parameters."
+            )
+
+        # We first have to create the TrainDataNodes objects for this we split
+        # the data into nb_clients using split_method
+        (
+            self.clients,
+            self.train_data_nodes,
+            test_data_nodes,
+            _,
+            _,
+        ) = split_dataframe_across_clients(
+            df=data,
+            n_clients=n_clients,
+            split_method=split_method,
+            split_method_kwargs=split_method_kwargs,
+            backend_type=backend_type,
+            data_path=data_path,
+            urls=urls,
+            tokens=tokens,
+        )
+        if server_org_id is not None:
+            # Curiously we don't need to identify the server with its own token
+            # it's basically a passive entity
+            kwargs_agg_node = {
+                "organization_id": server_org_id,
+            }
+            self.aggregation_node = AggregationNode(**kwargs_agg_node)
+        # Overwrites test_data_nodes
+        if self.test_data_nodes is None:
+            self.test_data_nodes = test_data_nodes
+        else:
+            raise ValueError(
+                "You should not use the fit method if you already provided"
+                " test_data_nodes"
+            )
+
+        # So there is a tension between every param is given at instantiation or
+        # everything is given to fit
+        dp_params_given = False
+        for dp_param_name in [
+            "dp_target_epsilon",
+            "dp_target_delta",
+            "dp_max_grad_norm",
+            "dp_propensity_model_training_params",
+            "dp_propensity_model_optimizer_class",
+            "dp_propensity_model_optimizer_kwargs",
+        ]:
+            param = eval(dp_param_name)
+            if param is not None:
+                dp_params_given = True
+                setattr(self, dp_param_name, param)
+
+        if dp_params_given:
+            # We need to reset the training mode more deeply
+            self.set_propensity_model_strategy()
+            # Allow for robust=True
+            self.strategies[0] = self.propensity_model_strategy
+            self.strategies[1] = self.webdisco_strategy
+
+        if robust != self.robust:
+            self.robust = robust
+            if robust:
+
+                class MockAlgo:
+                    def __init__(self):
+                        self.strategies = ["Robust Cox Variance"]
+
+                mock_algo = MockAlgo()
+                self.strategies.append(
+                    RobustCoxVariance(
+                        algo=mock_algo,
+                    )
+                )
+                # We put WebDisco in "robust" mode in the sense that we ask it
+                # to store all needed quantities for robust variance estimation
+                self.strategies[
+                    1
+                ].algo._robust = True  # not sufficient for serialization
+                # possible only because we added robust as a kwargs
+                self.strategies[1].algo.kwargs.update({"robust": True})
+                # We need those two lines for the zip to consider all 3
+                # strategies
+                self.metrics_dicts_list.append({})
+                self.num_rounds_list.append(sys.maxsize)
+            else:
+                self.strategies = self.strategies[:2]
+
+        self.run(targets=targets)
+        self.propensity_scores_, self.weights_ = self.compute_propensity_scores(data)
+
+    def run(self, targets: Union[pd.DataFrame, None] = None):
+        """Run the federated iptw algorithms."""
+        del targets
+        print("Careful for now the argument target is ignored completely")
+        # We first run the propensity model
+        print("Fitting the propensity model...")
+        t1 = time.time()
+        super().run(1)
+
+        if not (self.simu_mode):
+            self.check_cp_status()
+            self.performances_propensity_model = pd.DataFrame(
+                self.ds_client.get_performances(self.compute_plan_keys[0].key).dict()
+            )
+        else:
+            self.performances_propensity_model = self.performances_strategies[0]
+            print(self.performances_propensity_model)
+        t2 = time.time()
+        self.propensity_model_fit_time = t2 - t1
+        print(f"Time to fit Propensity model {self.propensity_model_fit_time}s")
+        print("Finished, recovering the final propensity model from substra")
+        # TODO to add the opportunity to use the targets you have to either:
+        # give the full targets to every client as a kwargs of their Algo
+        # so effectively one would need to reinstantiate algos objects or to
+        # modify the API to do it in the run (cleaner)
+        # or to rebuild the data on disk with an additional column that would be
+        # the propensity score, aka rerun split_dataframes after having given it
+        # an additional column and modify the algo so that it uses this column as
+        # a score. Both schemes are quite cumbersome to implement.
+        # We retrieve the model and pass it to the strategy
+        # we run the IPTW Cox
+        if not (self.simu_mode):
+            algo = download_algo_state(
+                client=self.ds_client,
+                compute_plan_key=self.compute_plan_keys[0].key,
+                round_idx=None,
+            )
+
+            self.propensity_model = algo.model
+        else:
+            # The algos are stored in the nodes
+            self.propensity_model = self.train_data_nodes[0].algo.model
+        # TODO check with webdisco as well
+        # Do not touch the two lines below this is dark dark magic
+        self.strategies[1].algo._propensity_model = self.propensity_model
+        self.strategies[1].algo.kwargs.update(
+            {"propensity_model": self.propensity_model}
+        )
+        # We need to save intermediate outputs now
+        for t in self.train_data_nodes:
+            t.keep_intermediate_states = True
+
+        print("Fitting propensity weighted Cox model...")
+        t1 = time.time()
+        super().run(1)
+
+        if not self.simu_mode:
+            self.check_cp_status(idx=1)
+        t2 = time.time()
+        self.webdisco_fit_time = t2 - t1
+        print(f"Time to fit WebDisco {self.webdisco_fit_time}s")
+        print("Finished fitting weighted Cox model.")
+        self.total_fit_time = self.propensity_model_fit_time + self.webdisco_fit_time
+        self.print_summary()
+
+    def print_summary(self):
+        """Print a summary of the FedECA estimation."""
+        assert (
+            len(self.compute_plan_keys) == 2
+        ), "You need to run the run method before getting the summary"
+        print("Evolution of performance of propensity model:")
+        print(self.performances_propensity_model)
+        print("Checking if the Cox model has converged:")
+        self.get_final_cox_model()
+        print("Computing summary...")
+        self.compute_summary()
+        print("Final partial log-likelihood:")
+        print(self.ll)
+        print(self.results_)
+
+    def get_final_cox_model(self):
+        """Retrieve final cox model."""
+        print("Retrieving final hessian and log-likelihood")
+        if not self.simu_mode:
+            cp = self.compute_plan_keys[1].key
+        else:
+            cp = self.compute_plan_keys[1]
+
+        (
+            self.hessian,
+            self.ll,
+            self.final_params,
+            self.computed_stds,
+            self.global_robust_statistics,
+        ) = get_final_cox_model_function(
+            self.ds_client,
+            cp,
+            self.num_rounds_list[1],
+            self.standardize_data,
+            self.duration_col,
+            self.event_col,
+            simu_mode=self.simu_mode,
+            robust=self.robust,
+        )
+
+    def compute_propensity_scores(self, data: pd.DataFrame):
+        """Compute propensity scores and corresponding weights."""
+        X = data.drop([self.duration_col, self.event_col, self.treated_col], axis=1)
+        Xprop = torch.from_numpy(np.array(X)).type(self.torch_dtype)
+        with torch.no_grad():
+            propensity_scores = self.propensity_model(Xprop)
+
+        propensity_scores = propensity_scores.detach().numpy().flatten()
+        weights = data[self.treated_col] * 1.0 / propensity_scores + (
+            1 - data[self.treated_col]
+        ) * 1.0 / (1.0 - propensity_scores)
+
+        return np.array(propensity_scores), np.array(weights)
+
+    def compute_summary(self, alpha=0.05):
+        """Compute summary for a given threshold.
+
+        Parameters
+        ----------
+        alpha: float, (default=0.05)
+            Confidence level for computing CIs
+        """
+        self.variance_matrix = -inv(self.hessian) / np.outer(
+            self.computed_stds, self.computed_stds
+        )
+        if self.robust:
+            assert self.global_robust_statistics
+            beta = self.final_params
+            variance_matrix = self.variance_matrix
+            global_robust_statistics = self.global_robust_statistics
+            propensity_model = self.propensity_model
+            duration_col = self.duration_col
+            event_col = self.event_col
+            treated_col = self.treated_col
+
+            # no self attributes in this class !!!!!!
+            class MyRobustCoxVarianceAlgo(RobustCoxVarianceAlgo):
+                def __init__(self, **kwargs):
+                    super().__init__(
+                        beta=beta,
+                        variance_matrix=variance_matrix,
+                        global_robust_statistics=global_robust_statistics,
+                        propensity_model=propensity_model,
+                        duration_col=duration_col,
+                        event_col=event_col,
+                        treated_col=treated_col,
+                    )
+
+            my_robust_cox_algo = MyRobustCoxVarianceAlgo()
+            # Now we need to make sure strategy has the right algo
+            self.strategies[2].algo = my_robust_cox_algo
+            super().run(1)
+
+            if not self.simu_mode:
+                self.check_cp_status(idx=2)
+                self.variance_matrix = get_outmodel_function(
+                    "Aggregating Qk into Q",
+                    self.ds_client,
+                    compute_plan_key=self.compute_plan_keys[2].key,
+                    idx_task=0,
+                )
+
+            else:
+                # Awful but hard to hack better
+                self.variance_matrix = sum(
+                    [e.algo._client_statistics["Qk"] for e in self.compute_plan_keys[2]]
+                )
+
+        summary = compute_summary_function(
+            self.final_params, self.variance_matrix, alpha
+        )
+        summary["exp(coef)"] = np.exp(summary["coef"])
+        summary["exp(coef) lower 95%"] = np.exp(summary["coef lower 95%"])
+        summary["exp(coef) upper 95%"] = np.exp(summary["coef upper 95%"])
+
+        self.results_ = summary.copy()
+
+
+class LogisticRegressionTorch(nn.Module):
+    """Pytorch logistic regression class."""
+
+    def __init__(self, ndim, torch_dtype=torch.float64):
+        """Initialize Logistic Regression model in PyTorch.
+
+        Parameters
+        ----------
+        ndim : int
+            Number of input dimensions.
+        torch_dtype : torch.dtype, optional
+            Data type for PyTorch tensors, by default torch.float64.
+        """
+        self.torch_dtype = torch_dtype
+        self.ndim = ndim
+        super(LogisticRegressionTorch, self).__init__()
+        self.fc1 = nn.Linear(self.ndim, 1).to(self.torch_dtype)
+        # Zero-init as in sklearn
+        self.fc1.weight.data.fill_(0.0)
+        self.fc1.bias.data.fill_(0.0)
+
+    def forward(self, x, eval=False):
+        """Perform a forward pass through the Logistic Regression model.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape (batch_size, ndim).
+        eval : bool, optional
+            Set to True during evaluation, by default False.
+
+        Returns
+        -------
+        torch.Tensor
+            Predicted probabilities after passing through sigmoid activation.
+        """
+        x = self.fc1(x)
+        return torch.sigmoid(x)
diff --git a/fedeca/metrics/metrics.py b/fedeca/metrics/metrics.py
new file mode 100644
index 00000000..18fb8ca5
--- /dev/null
+++ b/fedeca/metrics/metrics.py
@@ -0,0 +1,58 @@
+"""Define metrics for ECA analysis."""
+
+import numpy as np
+import pandas as pd
+
+
+def standardized_mean_diff(confounders, treated):
+    """Compute the Standardized Mean Differences (SMD).
+
+    Compute the Standardized Mean Differences between
+    treated and control patients.
+
+    Parameters
+    ----------
+    confounders : np.ndarray
+        confounders array.
+    treated : np.ndarray
+        mask of booleans giving information about treated patients.
+
+    Returns
+    -------
+    smd: np.ndarray
+        standardized mean differences of the confounders.
+    """
+    n_unique = confounders.nunique()
+    cat_variables = n_unique == 2
+    continuous_variables = n_unique != 2
+
+    smd_continuous = (
+        confounders.loc[treated, continuous_variables].mean()
+        - confounders.loc[~treated, continuous_variables].mean()
+    )
+    smd_continuous /= np.sqrt(
+        (
+            confounders.loc[treated, continuous_variables].var()
+            + confounders.loc[~treated, continuous_variables].var()
+        )
+        / 2
+    )
+    smd_continuous *= 100
+
+    smd_cat = (
+        confounders.loc[treated, cat_variables].mean()
+        - confounders.loc[~treated, cat_variables].mean()
+    )
+    smd_cat /= np.sqrt(
+        (
+            confounders.loc[treated, cat_variables].mean()
+            * (1 - confounders.loc[treated, cat_variables]).mean()
+            + confounders.loc[~treated, cat_variables].mean()
+            * (1 - confounders.loc[~treated, cat_variables]).mean()
+        )
+        / 2
+    )
+    smd_cat *= 100
+
+    smd = pd.concat([smd_continuous, smd_cat])
+    return smd
diff --git a/fedeca/schemas.py b/fedeca/schemas.py
new file mode 100644
index 00000000..726f4828
--- /dev/null
+++ b/fedeca/schemas.py
@@ -0,0 +1,54 @@
+"""Schemas used in the application."""
+from enum import Enum
+from typing import List
+
+import numpy as np
+import pydantic
+
+
+class StrategyName(str, Enum):
+    """Strategy name class."""
+
+    FEDERATED_AVERAGING = "Federated Averaging"
+    SCAFFOLD = "Scaffold"
+    ONE_ORGANIZATION = "One organization"
+    NEWTON_RAPHSON = "Newton Raphson"
+
+
+class _Model(pydantic.BaseModel):
+    """Base model configuration."""
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class WebDiscoAveragedStates(_Model):
+    """Shared state sent by the aggregate_organization in the Newton Raphson strategy.
+
+    Args
+    ----
+        parameters_update (numpy.ndarray): the new parameters_update sent to the clients
+    """
+
+    risk_phi: List[np.ndarray]
+    risk_phi_x: List[np.ndarray]
+    risk_phi_x_x: List[np.ndarray]
+
+
+class WebDiscoSharedState(_Model):
+    r"""WebDisco shared state class.
+
+    Shared state returned by the train method of the algorithm for each client,
+    received by the aggregate function in the Newton Raphson strategy.
+
+    Args
+    ----
+        n_samples (int): number of samples of the client dataset.
+        gradients (numpy.ndarray): gradients of the model parameters :math:`\\theta`.
+        hessian (numpy.ndarray): second derivative of the loss function regarding
+        the model parameters :math:`\\theta`.
+    """
+
+    risk_phi: List[np.ndarray]
+    risk_phi_x: List[np.ndarray]
+    risk_phi_x_x: List[np.ndarray]
diff --git a/fedeca/scripts/__init__.py b/fedeca/scripts/__init__.py
new file mode 100644
index 00000000..78a12edf
--- /dev/null
+++ b/fedeca/scripts/__init__.py
@@ -0,0 +1 @@
+"""A module grouping :mod:`torch` neural network models."""
diff --git a/fedeca/scripts/dp_logreg.py b/fedeca/scripts/dp_logreg.py
new file mode 100644
index 00000000..361284a9
--- /dev/null
+++ b/fedeca/scripts/dp_logreg.py
@@ -0,0 +1,210 @@
+"""Runs the propensity model training part with DP."""
+import sys
+from itertools import product
+
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.metrics import accuracy_score
+from substrafl.algorithms.pytorch import TorchNewtonRaphsonAlgo
+from substrafl.model_loading import download_algo_state
+from substrafl.strategies import FedAvg, NewtonRaphson
+from torch.optim import SGD
+
+from fedeca.algorithms.torch_dp_fed_avg_algo import TorchDPFedAvgAlgo
+from fedeca.fedeca_core import LogisticRegressionTorch
+from fedeca.utils import (
+    Experiment,
+    make_accuracy_function,
+    make_substrafl_torch_dataset_class,
+)
+from fedeca.utils.survival_utils import CoxData, make_categorical
+
+if __name__ == "__main__":
+    epsilons = [0.1, 1.0, 5.0, 10.0][::-1]
+    deltas = [10 ** (-i) for i in range(1, 3)]
+    START_SEED = 42
+    NDIM = 10
+    NUM_ROUNDS = 10
+    NUM_UPDATES = 100
+    N_REPETITIONS = 5
+    BACKEND_TYPE = "subprocess"
+    BATCH_SIZE = 32
+    na_proportion = 0.0
+    seeds = np.arange(START_SEED, START_SEED + N_REPETITIONS).tolist()
+
+    rng = np.random.default_rng(seeds[0])
+    # Generating data with strong linear relationship
+    simu_coxreg = CoxData(
+        n_samples=300,
+        ndim=NDIM,
+        prop_treated=0.5,
+        propensity="linear",
+        dtype="float32",
+        overlap=100.0,
+        seed=rng,
+        random_censoring=True,
+        censoring_factor=0.3,
+        standardize_features=False,
+    )
+    X, T, C, treated, _ = simu_coxreg.generate_data()
+    # Will make first columns to be categorical
+    Xcat, Xcont = make_categorical(X, up_to=0)
+    # Build the final dataframe using appropriate column names and adding missing values
+    cols_dict = {}
+    X = np.concatenate((Xcat, Xcont), axis=1)
+    for i in range(Xcat.shape[1] + Xcont.shape[1]):
+        currentX = X[:, i].astype("float32")
+        mask_na = rng.uniform(0, 1, X.shape[0]) > (1.0 - na_proportion)
+        currentX[mask_na] = np.nan
+        if i < Xcat.shape[1]:
+            colname = "cat_col"
+        else:
+            colname = "col"
+            i -= Xcat.shape[1]
+        cols_dict[f"{colname}_{i}"] = currentX
+
+        #  The absolute value is superfluous but just to be sure
+        cols_dict["T"] = np.abs(T)
+        cols_dict["E"] = (1.0 - C).astype("uint8")
+        cols_dict["treated"] = treated
+
+        df = pd.DataFrame(cols_dict)
+        # Final cast of categorical columns that was impossible due to nan in numpy
+        for i in range(Xcat.shape[1]):
+            df[f"cat_col_{i}"] = df[f"cat_col_{i}"].astype("Int64")
+
+    results_all_reps = []
+    edelta_list = list(product(epsilons, deltas))
+    accuracy_metrics_dict = {"accuracy": make_accuracy_function("treated")}
+    # We set model and dataloaders to be the same for each rep
+    logreg_model = LogisticRegressionTorch(NDIM, torch.float32)
+    logreg_dataset_class = make_substrafl_torch_dataset_class(
+        ["treated"], "E", "T", dtype="float32", return_torch_tensors=True
+    )
+
+    for se in seeds:
+        # We run NewtonRaphson wo DP
+        class NRAlgo(TorchNewtonRaphsonAlgo):
+            """Newton-Raphson algo.
+
+            Parameters
+            ----------
+            TorchNewtonRaphsonAlgo : _type_
+                _description_
+            """
+
+            def __init__(self):
+                """Instantiate NRAlgo wo DP."""
+                super().__init__(
+                    model=logreg_model,
+                    batch_size=sys.maxsize,
+                    criterion=nn.BCELoss(),
+                    dataset=logreg_dataset_class,
+                    seed=se,  # shouldn't have any effect
+                )
+
+        nr_algo = NRAlgo()
+        nr_strategy = NewtonRaphson(damping_factor=0.8, algo=nr_algo)
+        regular_xp = Experiment(
+            strategies=[nr_strategy],
+            num_rounds_list=[10],
+            metrics_dicts_list=[accuracy_metrics_dict],
+        )
+
+        regular_xp.fit(df, nb_clients=3, backend_type=BACKEND_TYPE)
+        if regular_xp.ds_client.is_simu:
+            final_model = regular_xp.train_data_nodes[0].algo.model
+        else:
+            final_algo = download_algo_state(
+                client=regular_xp.ds_client,
+                compute_plan_key=regular_xp.compute_plan_keys[0].key,
+                round_idx=None,
+            )
+
+            final_model = final_algo.model
+        final_pred = (
+            final_model(
+                torch.from_numpy(
+                    df.drop(columns=["treated", "T", "E"]).to_numpy().astype("float32")
+                )
+            )
+            .detach()
+            .numpy()
+        )
+        y_true = df["treated"].to_numpy()
+        mean_perf = accuracy_score(y_true, final_pred > 0.5)
+
+        print(f"Mean performance without DP, Perf={mean_perf}")
+        results_all_reps.append({"perf": mean_perf, "e": None, "d": None, "seed": se})
+
+        for e, d in edelta_list:
+            # We init an algo with the right target epsilon and delta
+            # The init (zero init) is the same for all models but batching seeding
+            # is controlled by se.
+            logreg_model = LogisticRegressionTorch(NDIM, torch.float32)
+            optimizer = SGD(logreg_model.parameters(), lr=0.01)
+
+            class DPLogRegAlgo(TorchDPFedAvgAlgo):
+                """DP FedAvg algo.
+
+                Parameters
+                ----------
+                TorchDPFedAvgAlgo : _type_
+                    _description_
+                """
+
+                def __init__(self):
+                    """Instantiate FedAvg algo with DP."""
+                    super().__init__(
+                        model=logreg_model,
+                        criterion=nn.BCELoss(),
+                        optimizer=optimizer,
+                        dataset=logreg_dataset_class,
+                        seed=se,
+                        num_updates=NUM_UPDATES,
+                        batch_size=BATCH_SIZE,
+                        num_rounds=NUM_ROUNDS,
+                        dp_target_epsilon=e,
+                        dp_target_delta=d,
+                        dp_max_grad_norm=1.0,
+                    )
+
+            dp_algo = DPLogRegAlgo()
+            dp_fedavg_strategy = FedAvg(algo=dp_algo)
+            dp_xp = Experiment(
+                strategies=[dp_fedavg_strategy],
+                num_rounds_list=[NUM_ROUNDS],
+                metrics_dicts_list=[accuracy_metrics_dict],
+            )
+            dp_xp.fit(df, nb_clients=3, backend_type=BACKEND_TYPE)
+            if dp_xp.ds_client.is_simu:
+                final_model = dp_xp.train_data_nodes[0].algo.model
+            else:
+                final_algo = download_algo_state(
+                    client=dp_xp.ds_client,
+                    compute_plan_key=dp_xp.compute_plan_keys[0].key,
+                    round_idx=None,
+                )
+                final_model = final_algo.model
+            final_pred = (
+                final_model(
+                    torch.from_numpy(
+                        df.drop(columns=["treated", "T", "E"])
+                        .to_numpy()
+                        .astype("float32")
+                    )
+                )
+                .detach()
+                .numpy()
+            )
+            y_true = df["treated"].to_numpy()
+            mean_perf = accuracy_score(y_true, final_pred > 0.5)
+
+            print(f"Mean performance eps={e}, delta={d}, Perf={mean_perf}")
+            # mean_perf = float(np.random.uniform(0, 1.))
+            results_all_reps.append({"perf": mean_perf, "e": e, "d": d, "seed": se})
+
+    results = pd.DataFrame.from_dict(results_all_reps)
+    results.to_csv("results_logreg_dp_training.csv", index=False)
diff --git a/fedeca/scripts/dp_plot.py b/fedeca/scripts/dp_plot.py
new file mode 100644
index 00000000..daa861b0
--- /dev/null
+++ b/fedeca/scripts/dp_plot.py
@@ -0,0 +1,56 @@
+"""Plot difference between DP training and normal training."""
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+sns.set_theme(style="darkgrid")
+
+if __name__ == "__main__":
+    results = pd.read_csv("results_logreg_dp_training.csv")
+    results = results.rename(columns={"perf": "Performance"})
+    linestyle_str = [
+        ("solid", "solid"),  # Same as (0, ()) or '-'
+        ("dotted", "dotted"),  # Same as (0, (1, 1)) or ':'
+        ("dashed", "dashed"),  # Same as '--'
+        ("dashdot", "dashdot"),
+    ]
+    linestyle_tuple = [
+        ("loosely dotted", (0, (1, 10))),
+        ("densely dotted", (0, (1, 1))),
+        ("loosely dashed", (0, (5, 10))),
+        ("densely dashed", (0, (5, 1))),
+        ("loosely dashdotted", (0, (3, 10, 1, 10))),
+        ("densely dashdotted", (0, (3, 1, 1, 1))),
+        ("dashdotdotted", (0, (3, 5, 1, 5, 1, 5))),
+        ("loosely dashdotdotted", (0, (3, 10, 1, 10, 1, 10))),
+        ("densely dashdotdotted", (0, (3, 1, 1, 1, 1, 1))),
+    ]
+    linestyles = linestyle_tuple + linestyle_str
+    deltas = [d for d in results["d"].unique() if not (np.isnan(d))]
+    fig, ax = plt.subplots()
+    for i, d in enumerate(deltas):
+        cdf = results.loc[results["d"] == d]
+        sns.lineplot(
+            data=cdf,
+            x="e",
+            y="Performance",
+            label=rf"$\delta={d}$",
+            linestyle=linestyles[::-1][i][1],
+            ax=ax,
+        )
+    ax.set_xscale("log")
+    xtick_values = [d for d in results["e"].unique() if not (np.isnan(d))]
+    xlabels = [str(v) for v in xtick_values]
+    ax.set_xticks(xtick_values, xlabels)
+    ax.axhline(
+        np.array(results.loc[results["d"].isnull(), "Performance"].tolist()).mean(),
+        color="black",
+        label="Baseline wo DP",
+    )
+    ax.set_xlim(0.1, 10.0)
+    plt.legend()
+    plt.xlim(0.1, 10.0)
+    plt.xlabel(r"$\epsilon$")
+    plt.ylabel("Performance")
+    plt.savefig("DP_plot_propensity.pdf", dpi=100, bbox_inches="tight")
diff --git a/fedeca/scripts/fl_iptw.py b/fedeca/scripts/fl_iptw.py
new file mode 100644
index 00000000..d65aac16
--- /dev/null
+++ b/fedeca/scripts/fl_iptw.py
@@ -0,0 +1,58 @@
+"""Federated IPTW script."""
+import torch
+
+from fedeca import FedECA
+from fedeca.utils.data_utils import generate_survival_data
+
+if __name__ == "__main__":
+    seed = 42
+    torch.manual_seed(seed)
+    N_CLIENTS = 2
+    NDIM = 10
+    URLS = []
+    TOKENS = []
+    # Choose BACKEND_TYPE between subprocess, remote and docker
+
+    BACKEND_TYPE = "subprocess"
+    if BACKEND_TYPE == "remote":
+        # If you use BACKEND_TYPE="remote", download your API key with SSO login then
+        #  copy-paste it in a file called api_key inside the tokens folder otherwise
+        # comment the following two lines
+        URLS = [f"https://api.org-{i + 1}.demo.cg.owkin.tech" for i in range(N_CLIENTS)]
+        TOKENS = [open(f"tokens/api_key{i + 1}", "r").read() for i in range(N_CLIENTS)]
+
+    df, cox_model_coeffs = generate_survival_data(
+        na_proportion=0.0,
+        ncategorical=0,
+        ndim=NDIM,
+        seed=seed,
+        n_samples=1000,
+        use_cate=False,
+        censoring_factor=0.3,
+    )
+
+    # We can choose not to give any clients or data of any kind to FedECA
+    # they will be given to it by the fit method
+    IPTW = FedECA(
+        ndim=NDIM,
+        treated_col="treated",
+        duration_col="T",
+        event_col="E",
+        num_rounds_list=[2, 4],
+        dp_target_epsilon=10.0,
+        dp_max_grad_norm=1.1,
+        dp_target_delta=0.1,
+        dp_propensity_model_training_params={"batch_size": 50, "num_updates": 200},
+        dp_propensity_model_optimizer_kwargs={"lr": 0.001},
+    )
+    IPTW.fit(
+        df,
+        None,
+        N_CLIENTS,
+        split_method="split_control_over_centers",
+        split_method_kwargs={"treatment_info": "treated"},
+        backend_type=BACKEND_TYPE,
+        urls=URLS,
+        tokens=TOKENS,
+        robust=True,
+    )
diff --git a/fedeca/scripts/iptw_experiment/.gitkeep b/fedeca/scripts/iptw_experiment/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/fedeca/scripts/results_logreg_dp_training.csv b/fedeca/scripts/results_logreg_dp_training.csv
new file mode 100644
index 00000000..05e1337a
--- /dev/null
+++ b/fedeca/scripts/results_logreg_dp_training.csv
@@ -0,0 +1,106 @@
+perf,e,d,seed
+0.98,,,42
+0.84,10.0,0.1,42
+0.8433333333333334,10.0,0.01,42
+0.8433333333333334,10.0,0.001,42
+0.8466666666666667,10.0,0.0001,42
+0.8433333333333334,5.0,0.1,42
+0.8466666666666667,5.0,0.01,42
+0.8433333333333334,5.0,0.001,42
+0.8466666666666667,5.0,0.0001,42
+0.8466666666666667,1.0,0.1,42
+0.8466666666666667,1.0,0.01,42
+0.82,1.0,0.001,42
+0.8033333333333333,1.0,0.0001,42
+0.8133333333333334,0.1,0.1,42
+0.69,0.1,0.01,42
+0.6433333333333333,0.1,0.001,42
+0.6366666666666667,0.1,0.0001,42
+0.7866666666666666,0.01,0.1,42
+0.6133333333333333,0.01,0.01,42
+0.6,0.01,0.001,42
+0.6,0.01,0.0001,42
+0.98,,,43
+0.8433333333333334,10.0,0.1,43
+0.8433333333333334,10.0,0.01,43
+0.84,10.0,0.001,43
+0.84,10.0,0.0001,43
+0.84,5.0,0.1,43
+0.8333333333333334,5.0,0.01,43
+0.8333333333333334,5.0,0.001,43
+0.8333333333333334,5.0,0.0001,43
+0.83,1.0,0.1,43
+0.8066666666666666,1.0,0.01,43
+0.7866666666666666,1.0,0.001,43
+0.78,1.0,0.0001,43
+0.7833333333333333,0.1,0.1,43
+0.6766666666666666,0.1,0.01,43
+0.65,0.1,0.001,43
+0.6366666666666667,0.1,0.0001,43
+0.7766666666666666,0.01,0.1,43
+0.6066666666666667,0.01,0.01,43
+0.6,0.01,0.001,43
+0.6,0.01,0.0001,43
+0.98,,,44
+0.84,10.0,0.1,44
+0.84,10.0,0.01,44
+0.8333333333333334,10.0,0.001,44
+0.8333333333333334,10.0,0.0001,44
+0.8333333333333334,5.0,0.1,44
+0.83,5.0,0.01,44
+0.8266666666666667,5.0,0.001,44
+0.8266666666666667,5.0,0.0001,44
+0.8133333333333334,1.0,0.1,44
+0.8066666666666666,1.0,0.01,44
+0.8066666666666666,1.0,0.001,44
+0.7966666666666666,1.0,0.0001,44
+0.8033333333333333,0.1,0.1,44
+0.7233333333333334,0.1,0.01,44
+0.68,0.1,0.001,44
+0.67,0.1,0.0001,44
+0.79,0.01,0.1,44
+0.6566666666666666,0.01,0.01,44
+0.6433333333333333,0.01,0.001,44
+0.6433333333333333,0.01,0.0001,44
+0.98,,,45
+0.8333333333333334,10.0,0.1,45
+0.83,10.0,0.01,45
+0.8266666666666667,10.0,0.001,45
+0.8266666666666667,10.0,0.0001,45
+0.8233333333333334,5.0,0.1,45
+0.82,5.0,0.01,45
+0.8266666666666667,5.0,0.001,45
+0.8233333333333334,5.0,0.0001,45
+0.81,1.0,0.1,45
+0.7866666666666666,1.0,0.01,45
+0.78,1.0,0.001,45
+0.7733333333333333,1.0,0.0001,45
+0.7766666666666666,0.1,0.1,45
+0.7033333333333334,0.1,0.01,45
+0.6733333333333333,0.1,0.001,45
+0.66,0.1,0.0001,45
+0.7533333333333333,0.01,0.1,45
+0.6333333333333333,0.01,0.01,45
+0.6233333333333333,0.01,0.001,45
+0.6233333333333333,0.01,0.0001,45
+0.98,,,46
+0.8266666666666667,10.0,0.1,46
+0.83,10.0,0.01,46
+0.8366666666666667,10.0,0.001,46
+0.8333333333333334,10.0,0.0001,46
+0.8366666666666667,5.0,0.1,46
+0.8333333333333334,5.0,0.01,46
+0.8366666666666667,5.0,0.001,46
+0.8333333333333334,5.0,0.0001,46
+0.8266666666666667,1.0,0.1,46
+0.8066666666666666,1.0,0.01,46
+0.7466666666666667,1.0,0.001,46
+0.7166666666666667,1.0,0.0001,46
+0.73,0.1,0.1,46
+0.54,0.1,0.01,46
+0.4866666666666667,0.1,0.001,46
+0.4633333333333333,0.1,0.0001,46
+0.69,0.01,0.1,46
+0.45,0.01,0.01,46
+0.44,0.01,0.001,46
+0.43666666666666665,0.01,0.0001,46
diff --git a/fedeca/scripts/substra_assets/csv_opener.py b/fedeca/scripts/substra_assets/csv_opener.py
new file mode 100644
index 00000000..13751b88
--- /dev/null
+++ b/fedeca/scripts/substra_assets/csv_opener.py
@@ -0,0 +1,357 @@
+"""CSV opener for substra."""
+import pathlib
+
+import numpy as np
+import pandas as pd
+import substratools as tools
+from scipy.linalg.special_matrices import toeplitz
+from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler
+
+
+class CSVOpener(tools.Opener):
+    """CSV opener class."""
+
+    def fake_data(self, n_samples=None):
+        """Generate simulated survival data.
+
+        Parameters
+        ----------
+        n_samples : int or None, optional
+            Number of samples, by default None.
+
+        Returns
+        -------
+        pd.DataFrame
+            Fake survival data.
+        """
+        N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100
+        return generate_survival_data(
+            nsample=N_SAMPLES,
+            na_proportion=0.0,
+            ndim=10,
+            seed=np.random.randint(0, 10000),
+        )
+
+    def get_data(self, folders):
+        """Get data from CSV files.
+
+        Parameters
+        ----------
+        folders : list
+            List of folder paths.
+
+        Returns
+        -------
+        pd.DataFrame
+            Loaded data from CSV files.
+        """
+        # get npy files
+        p = pathlib.Path(folders[0])
+        csv_data_path = p / "data.csv"
+        # load data
+        data = pd.read_csv(csv_data_path)
+        return data
+
+
+class CoxData:
+    """Simulate Cox data.
+
+    This class simulates survival data following Cox model assumptions.
+    """
+
+    def __init__(
+        self,
+        n_samples=1000,
+        ndim=10,
+        features_type="cov_toeplitz",
+        cov_corr=0.5,
+        scale_t=0.1,
+        shape_t=3.0,
+        censoring_factor=0.5,
+        random_censoring=False,
+        seed=42,
+        standardize_features=True,
+        dtype="float64",
+    ):
+        r"""Cox Data generator class.
+
+        This class generates data according to a Cox proportional hazards model
+        in continuous time as follows:
+        .. math::
+          S(t|x) = P(T > t | X=x)
+          \\lambda(t|x) = \\frac{d \\log S(t|x)}{dt}
+          \\lambda(t|x) = \\lambda_0(t)e^{\\beta^T x}
+          \\Lambda_0(t|x) = \\int_0^t \\lambda_0(u)du = (s \\times t)^k
+          X \\sim \\mathcal{N}(0, C)
+          \\beta \\sim \\mathcal{N}(0, I)
+
+        Parameters
+        ----------
+        n_samples: int, optional
+            Number of samples to generate. Defaults to 1000
+        ndim: int, optional
+            Number of features, defaults to 10.
+        features_type: str, optional
+            Accepted values: `"cov_toeplitz"`, `"cov_uniform"`.
+        cov_corr: float, optional
+            The correlation of the covariance matrix.
+        scale_t: float, optional
+            Scale parameter `s` in the equations above. Defaults to `1`.
+        shape_t: float, optional
+            Shape parameter `k` in the equations above. Defaults to `1`.
+        censoring_factor: float, optional
+            Parameter used to determine the probability of being censored
+            (with respect to the median). Defaults to `0.5`.
+        random_censoring: bool, optional
+            Whether to censor completely independently of the rest or not.
+            When true, censors samples with probability censoring_factor.
+            When false, samples are censored if the drawn event times
+            (drawn from the Cox model) is smaller than an independent
+            exponential variable with scale factor
+            `censoring_factor * mean_time`, where `mean_time`
+            is the empirical mean of drawn event times.
+            Defaults to False.
+        seed: int, otional
+            The seed for reproducibility.
+        standardize_features: bool, optional
+            Whether to standardize features or not. Defaults to True.
+        dtype : `{'float64', 'float32'}`, default='float64'
+            Type of the arrays used.
+        """
+        self.n_samples = n_samples
+        self.ndim = ndim
+        self.features_type = features_type
+        self.cov_corr = cov_corr
+        self.scale = scale_t
+        self.shape = shape_t
+        self.censoring_factor = censoring_factor
+        self.random_censoring = random_censoring
+        self.standardize_features = standardize_features
+        self.dtype = dtype
+        self.coeffs = None
+        np.random.seed(seed)
+
+    def standardize_data(self, features: np.ndarray):
+        """Standardize data. Make data reduced centered.
+
+        Parameters
+        ----------
+        features : np.ndarray
+            Features to standardize.
+
+        Returns
+        -------
+        np.ndarray
+            Normalized features.
+        """
+        features -= features.mean(axis=0)
+        features /= features.std(axis=0)
+        return features
+
+    def generate_data(self):
+        """Generate final survival data.
+
+        Use the collection of methods of the class to
+        generate data following Cox assumptions.
+
+        Returns
+        -------
+        tuple
+            A tuple of np.ndarrays.
+        """
+        if self.features_type == "cov_uniform":
+            X = features_normal_cov_uniform(self.n_samples, self.ndim, dtype=self.dtype)
+        elif self.features_type == "indep_gauss":
+            X = np.random.randn(self.n_samples, self.ndim).astype(self.dtype)
+        else:
+            X = features_normal_cov_toeplitz(
+                self.n_samples, self.ndim, self.cov_corr, dtype=self.dtype
+            )
+        if self.standardize_features:
+            X = self.standardize_data(X)
+
+        self.coeffs = np.random.normal(size=(self.ndim,)).astype(self.dtype)
+        u = X.dot(self.coeffs)
+        # Simulation of true times
+        time_hazard_baseline = -np.log(
+            np.random.uniform(0, 1.0, size=self.n_samples).astype(self.dtype)
+        )
+        time_cox_unscaled = time_hazard_baseline * np.exp(-u)
+        times = 1.0 / self.scale * time_cox_unscaled ** (1.0 / self.shape)
+        avg_time = times.mean()
+        # Simulation of the censoring
+        if self.random_censoring:
+            censoring = np.random.rand(self.n_samples) < self.censoring_factor
+            times[censoring] = [
+                -np.random.uniform(0, t) for t in times[censoring].tolist()
+            ]
+            censoring = censoring.astype("uint8")
+        else:
+            c = self.censoring_factor
+            c_sampled = np.random.exponential(
+                scale=c * avg_time, size=self.n_samples
+            ).astype(self.dtype)
+            censoring = (times <= c_sampled).astype("uint8")
+            times[censoring] = [
+                -np.random.uniform(0, t) for t in times[censoring].tolist()
+            ]
+        return X, times, censoring
+
+
+def features_normal_cov_uniform(
+    n_samples: int = 200, n_features: int = 30, dtype="float64"
+):
+    """Generate Normal features with uniform covariance.
+
+    An example of features obtained as samples of a centered Gaussian
+    vector with a specific covariance matrix given by 0.5 * (U + U.T),
+    where U is uniform on [0, 1] and diagonal filled by ones.
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of samples. Default=200.
+    n_features : int
+        Number of features. Default=30.
+    dtype : str
+        `{'float64', 'float32'}`,
+        Type of the arrays used. Default='float64'
+    Returns
+    -------
+    output : numpy.ndarray, shape=(n_samples, n_features)
+        n_samples realization of a Gaussian vector with the described
+        covariance
+    """
+    pre_cov = np.random.uniform(size=(n_features, n_features)).astype(dtype)
+    np.fill_diagonal(pre_cov, 1.0)
+    cov = 0.5 * (pre_cov + pre_cov.T)
+    features = np.random.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
+    if dtype != "float64":
+        return features.astype(dtype)
+    return features
+
+
+def features_normal_cov_toeplitz(
+    n_samples: int = 200, n_features: int = 30, cov_corr: float = 0.5, dtype="float64"
+):
+    """Generate normal features with toeplitz covariance.
+
+    An example of features obtained as samples of a centered Gaussian
+    vector with a toeplitz covariance matrix.
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of samples. Default=200.
+    n_features : int
+        Number of features. Default=30.
+    cov_corr : float
+        correlation coefficient of the Toeplitz correlation matrix. Default=0.5.
+    dtype : str
+        `{'float64', 'float32'}`,
+        Type of the arrays used. Default='float64'
+    Returns
+    -------
+    output : numpy.ndarray, shape=(n_samples, n_features)
+        n_samples realization of a Gaussian vector with the described
+        covariance
+    """
+    cov = toeplitz(cov_corr ** np.arange(0, n_features))
+    features = np.random.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
+    if dtype != "float64":
+        return features.astype(dtype)
+    return features
+
+
+def make_categorical(X, up_to=25):
+    """Create categorical data.
+
+    Parameters
+    ----------
+    X: nd.array
+        Matrix from which to build the categorical features.
+    up_to: int
+        Takes up_to first columns to transform them into
+        categorical data.
+    """
+    Xleft = X[:, :up_to]
+    Xright = X[:, up_to:]
+    mm_normalizer = MinMaxScaler()
+    nbins_vector = np.random.randint(2, 10, size=up_to)
+    for j, nbins in enumerate(nbins_vector):
+        discretizer = KBinsDiscretizer(n_bins=nbins, encode="ordinal")
+        Xleft[:, j] = mm_normalizer.fit_transform(Xleft[:, j][:, None])[:, 0]
+        Xleft[:, j] = discretizer.fit_transform(Xleft[:, j][:, None])[:, 0]
+    return Xleft, Xright
+
+
+def generate_survival_data(
+    n_samples=100,
+    ndim=50,
+    censoring_factor=0.7,
+    seed=42,
+    ncategorical=25,
+    na_proportion=0.1,
+    dtype="float32",
+):
+    """Generate synthetic survival data.
+
+    Parameters
+    ----------
+    n_samples : int, optional
+        Number of samples, by default 100.
+    ndim : int, optional
+        Number of dimensions (features), by default 50.
+    censoring_factor : float, optional
+        Factor controlling censoring rate, by default 0.7.
+    seed : int, optional
+        Seed for random number generation, by default 42.
+    ncategorical : int, optional
+        Number of categorical features, by default 25.
+    na_proportion : float, optional
+        Proportion of missing values, by default 0.1.
+    dtype : str, optional
+        Data type for the generated data, by default "float32".
+
+    Returns
+    -------
+    pd.DataFrame
+        Synthetic survival data.
+    """
+    assert ncategorical <= ndim
+    simu_coxreg = CoxData(
+        n_samples,
+        ndim=ndim,
+        dtype=dtype,
+        seed=seed,
+        random_censoring=True,
+        censoring_factor=censoring_factor,
+        standardize_features=False,
+    )
+    X, T, C = simu_coxreg.generate_data()
+    # Will make first columns to be categorical
+    Xcat, Xcont = make_categorical(X, up_to=ndim // 2)
+    # Build the final dataframe using appropriate column names and adding missing values
+    cols_dict = {}
+    X = np.concatenate((Xcat, Xcont), axis=1)
+    for i in range(Xcat.shape[1] + Xcont.shape[1]):
+        currentX = X[:, i].astype("float32")
+        mask_na = np.random.uniform(0, 1, X.shape[0]) > (1.0 - na_proportion)
+        currentX[mask_na] = np.nan
+        if i < Xcat.shape[1]:
+            colname = "cat_col"
+        else:
+            colname = "col"
+            i -= Xcat.shape[1]
+        cols_dict[f"{colname}_{i}"] = currentX
+    # T is multiplied by -1 if censored and continuous,
+    #  we make it so that the first time is 10 and other times are integers
+    cols_dict["T"] = np.array(np.abs(T) - np.abs(T).min() + 10).astype("int")
+    cols_dict["C"] = C
+
+    df = pd.DataFrame(cols_dict)
+    # Final cast of categorical columns that was impossible due to nan in numpy
+    for i in range(Xcat.shape[1]):
+        df[f"cat_col_{i}"] = df[f"cat_col_{i}"].astype("Int64")
+
+    return df
diff --git a/fedeca/scripts/substra_assets/description.md b/fedeca/scripts/substra_assets/description.md
new file mode 100644
index 00000000..89626620
--- /dev/null
+++ b/fedeca/scripts/substra_assets/description.md
@@ -0,0 +1 @@
+whatever
\ No newline at end of file
diff --git a/fedeca/strategies/__init__.py b/fedeca/strategies/__init__.py
new file mode 100644
index 00000000..19c0a481
--- /dev/null
+++ b/fedeca/strategies/__init__.py
@@ -0,0 +1,2 @@
+"""Init file for strategies."""
+from .webdisco import WebDisco
diff --git a/fedeca/strategies/webdisco.py b/fedeca/strategies/webdisco.py
new file mode 100644
index 00000000..0b3d0401
--- /dev/null
+++ b/fedeca/strategies/webdisco.py
@@ -0,0 +1,695 @@
+"""File for webdisco strategy."""
+from copy import deepcopy
+from enum import Enum
+from typing import List, Optional
+
+import numpy as np
+from substrafl.algorithms.algo import Algo
+from substrafl.nodes.aggregation_node import AggregationNode
+from substrafl.nodes.references.local_state import LocalStateRef
+from substrafl.nodes.references.shared_state import SharedStateRef
+from substrafl.nodes.test_data_node import TestDataNode
+from substrafl.nodes.train_data_node import TrainDataNode
+from substrafl.remote import remote
+
+# from substrafl.schemas import WebDiscoAveragedStates
+# from substrafl.schemas import WebDiscoSharedState
+from substrafl.strategies.strategy import Strategy
+
+from fedeca.utils.moments_utils import aggregation_mean, compute_centered_moment
+
+
+class StrategyName(str, Enum):
+    """Class for the strategy name."""
+
+    WEBDISCO = "WebDisco"
+
+
+class WebDisco(Strategy):
+    """WebDisco strategy class.
+
+    It can only be used with traditional Cox models on pandas.DataFrames.
+    This strategy is one of its kind because it can only be used with
+    Linear CoxPH models defined in fedeca.utils.survival_utils. Therefore all models are
+    initialized with zeroed weights (as in lifelines), tested and we cover all possible
+    use cases with the dtype and ndim arguments. This strategy splits the computations
+    of gradient and Hessian between workers to compute a centralized batch Newton-
+    Raphson update on Breslow's partial log-likelihod (to handle tied events it uses
+    Breslow's approximation unlike lifelines which uses Efron's by default but Efron is
+    not separable). This strategy uses lifeline's adaptive step-size to converge faster
+    starting from initial_ste_size and use lifelines safe way of inverting the hessian.
+    As lifelines standardizes the data by default we allow the user to do it optionally.
+
+    Reference
+    ----------
+    - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5009917/
+
+    Parameters
+    ----------
+    statistics_computed: bool,
+        If the statistics that we can find in each gradient, hessian are already
+        computed and given as attribute to the server or not.
+    initial_step_size: float, otional
+        The initial step size of the Newton-Raphson algorithm at the server side.
+        The following steps will use lifelines heuristics to adapt the step-size.
+        Defaults to 0.9.
+    tol: float, optional
+        Capping every division to avoid dividing by 0. Defaults to 1e-16.
+    standardize_data: bool,
+        Whether or not to standardize the data before comuting updates.
+        Defaults to False.
+    penalizer: float, optional
+        Add a regularizer in case of ill-conditioned hessians, which happen quite
+        often with large covariates.
+        Defaults to 0.
+    l1_ratio: float, optional
+        When using a penalizer the ratio between L1 and L2 regularization as in
+        sklearn.
+        Defaults to 0.
+    """
+
+    def __init__(
+        self,
+        algo: Algo,
+        standardize_data: bool = True,
+        tol: float = 1e-16,
+    ):
+        """Initialize the Webdisco class.
+
+        Parameters
+        ----------
+        algo: Algo
+            Algorithm needed to perform the optimization.
+        standardize_data: bool,
+            Whether or not to standardize each features
+            (if True involves more tasks in order to compute
+            global means and stds)
+        tol: float,
+            Epsilon used to ensure no ZeroDivision Errors due to finite
+            numerical precision.
+        """
+        # !!! You actually need to pass all arguments explicitly through this init
+        # function so that kwargs is instantiated with the correct arguments !!!
+        super().__init__(
+            algo=algo,
+            standardize_data=standardize_data,
+            tol=tol,
+        )
+
+        # States
+        self._local_states: Optional[List[LocalStateRef]] = None
+        self._shared_states: Optional[List[SharedStateRef]] = None
+
+        self._standardize_data = standardize_data
+        self._tol = tol
+        self._survival_statistics_computed = False
+        self._gs_statistics_given = False
+        self._server_state = None
+        self.n_clients = None
+        self.count = 1
+
+    @property
+    def name(self) -> StrategyName:
+        """The name of the strategy.
+
+        Returns
+        -------
+        StrategyName: Name of the strategy
+        """
+        return StrategyName.WEBDISCO
+
+    def build_compute_plan(
+        self,
+        train_data_nodes: List[TrainDataNode],
+        aggregation_node: Optional[List[AggregationNode]],
+        evaluation_strategy,
+        num_rounds: int,
+        clean_models: Optional[bool],
+    ):
+        """Build the computation graph of the strategy.
+
+        It removes initialization round,
+        which is useless in this case as all models start at 0.
+
+        Parameters
+        ----------
+        train_data_nodes: typing.List[TrainDataNode],
+            list of the train organizations
+        aggregation_node: typing.Optional[AggregationNode],
+            aggregation node, necessary for centralized strategy, unused otherwise
+        evaluation_strategy: Optional[EvaluationStrategy],
+          When and how to compute performance.
+        num_rounds: int,
+            The number of rounds to perform.
+        clean_models: bool (default=True),
+            Clean the intermediary models on the Substra platform.
+            Set it to False if you want to download or re-use
+            intermediary models. This causes the disk space to
+            fill quickly so should be set to True unless needed.
+            Defaults to True.
+        """
+        additional_orgs_permissions = (
+            evaluation_strategy.test_data_nodes_org_ids
+            if evaluation_strategy is not None
+            else set()
+        )
+        # create computation graph.
+        for round_idx in range(0, num_rounds + 1):
+            self.perform_round(
+                train_data_nodes=train_data_nodes,
+                aggregation_node=aggregation_node,
+                additional_orgs_permissions=additional_orgs_permissions,
+                round_idx=round_idx,
+                clean_models=clean_models,
+            )
+
+            if evaluation_strategy is not None and next(evaluation_strategy):
+                self.perform_predict(
+                    train_data_nodes=train_data_nodes,
+                    test_data_nodes=evaluation_strategy.test_data_nodes,
+                    round_idx=round_idx,
+                )
+
+    def _global_standardization(
+        self,
+        local_computation_fct,
+        aggregation_fct,
+        train_data_nodes: List[TrainDataNode],
+        aggregation_node: AggregationNode,
+        clean_models: bool,
+    ):
+        local_moments = []
+        for node in train_data_nodes:
+            # define composite tasks (do not submit yet)
+            # for each composite task give description of
+            # Algo instead of a key for an algo
+            _, local_moment = node.update_states(
+                local_computation_fct(
+                    node.data_sample_keys,
+                    shared_state=None,
+                    _algo_name=local_computation_fct.__doc__.split("\n")[0],
+                ),
+                local_state=None,
+                round_idx=self.count,
+                authorized_ids=set([node.organization_id]),
+                aggregation_id=aggregation_node.organization_id,
+                clean_models=False,
+            )
+            # keep the states in a list: one/organization
+            local_moments.append(local_moment)
+
+        aggregated_moments = aggregation_node.update_states(
+            aggregation_fct(
+                shared_states=local_moments,
+                _algo_name=aggregation_fct.__doc__.split("\n")[0],
+            ),
+            round_idx=self.count,
+            authorized_ids=set(
+                train_data_node.organization_id for train_data_node in train_data_nodes
+            ),
+            clean_models=clean_models,
+        )
+        self.count += 1
+        return aggregated_moments
+
+    def _global_statistics(
+        self,
+        train_data_nodes: List[TrainDataNode],
+        aggregation_node: AggregationNode,
+        clean_models: bool,
+    ):
+        self._local_states = []
+        survival_statistics_list = []
+        for node in train_data_nodes:
+            # define composite tasks (do not submit yet)
+            # for each composite task give description of
+            # Algo instead of a key for an algo
+            local_state, next_shared_state = node.update_states(
+                self.algo._compute_local_constant_survival_statistics(
+                    node.data_sample_keys,
+                    shared_state=self._aggregated_moments,
+                    _algo_name="Compute Local Statistics",
+                ),
+                local_state=None,
+                round_idx=self.count,
+                authorized_ids=set([node.organization_id]),
+                aggregation_id=aggregation_node.organization_id,
+                clean_models=clean_models,
+            )
+            # keep the states in a list: one/organization
+            survival_statistics_list.append(next_shared_state)
+            self._local_states.append(local_state)
+
+        global_survival_statistics = aggregation_node.update_states(
+            self._compute_global_survival_statistics(
+                shared_states=survival_statistics_list,
+                _algo_name="Compute global statistics from local quantities",
+            ),
+            round_idx=self.count,
+            authorized_ids=set(
+                train_data_node.organization_id for train_data_node in train_data_nodes
+            ),
+            clean_models=clean_models,
+        )
+        self.count += 1
+        self._survival_statistics_computed = True
+        return global_survival_statistics
+
+    def perform_round(
+        self,
+        train_data_nodes: List[TrainDataNode],
+        aggregation_node: AggregationNode,
+        round_idx: int,
+        clean_models: bool,
+        additional_orgs_permissions: Optional[set] = None,
+    ):
+        """Perform one round of webdisco.
+
+        One round of the WebDisco strategy consists in:
+            - optionally compute global means and stds for all features if
+              standardize_data is True
+            - compute global survival statistics that will be reused at each round
+            - build building blocks of the gradient and hessian based on global risk
+              sets
+            - perform a Newton-Raphson update on each train data nodes
+        Parameters
+        ----------
+        train_data_nodes: typing.List[TrainDataNode],
+            List of the nodes on which to train
+        aggregation_node: AggregationNode
+            node without data, used to perform operations on the
+            shared states of the models
+        round_idx :int,
+            Round number, it starts at 0.
+        clean_models: bool,
+            Clean the intermediary models of this round on the
+            Substra platform. Set it to False if you want to
+            download or re-use intermediary models. This causes the
+            disk space to fill quickly so should be set to True unless needed.
+        additional_orgs_permissions: typing.Optional[set],
+            Additional permissions to give to the model outputs after training,
+            in order to test the model on an other organization.
+        """
+        if aggregation_node is None:
+            raise ValueError("In WebDisco strategy aggregation node cannot be None")
+
+        # Since algo and FL strategies are split we need to add this ugly assert here,
+        # note that we could force the algo.
+        # params to respect the strategies and the other way around
+        assert (
+            self.algo._standardize_data == self._standardize_data
+        ), f"""Algo and strategy standardize_data param differ
+            {self.algo._standardize_data}!={self._standardize_data}"""
+
+        # All models are initialized at
+        if self._standardize_data and (not hasattr(self, "_aggregated_moments")):
+            for _, (local_computation_fct, aggregation_fct) in enumerate(
+                zip([self.algo.local_uncentered_moments], [self.aggregate_moments])
+            ):
+                self._aggregated_moments = self._global_standardization(
+                    local_computation_fct=local_computation_fct,
+                    aggregation_fct=aggregation_fct,
+                    train_data_nodes=train_data_nodes,
+                    aggregation_node=aggregation_node,
+                    clean_models=clean_models,
+                )
+
+        else:
+            self._aggregated_moments = None
+
+        if not (self._survival_statistics_computed):
+            # Uses self._aggregated_moments internally
+            global_survival_statistics = self._global_statistics(
+                train_data_nodes=train_data_nodes,
+                aggregation_node=aggregation_node,
+                clean_models=clean_models,
+            )
+            # !!! The reason we are doing compute_local_phi_stats only once is
+            # subtle this is because to optimize we do it in train
+
+            risk_phi_stats_list = []
+            for i, node in enumerate(train_data_nodes):
+                # define composite tasks (do not submit yet)
+                # for each composite task give description of Algo instead of a key
+                # for an algo
+                local_state, risk_phi_stats = node.update_states(
+                    self.algo.compute_local_phi_stats(  # type: ignore
+                        node.data_sample_keys,
+                        shared_state=global_survival_statistics,
+                        _algo_name=f"Compute gradients and hessian bricks locally using algo {self.algo.__class__.__name__}",  # noqa: E501
+                    ),
+                    local_state=self._local_states[i],
+                    round_idx=self.count,
+                    authorized_ids=set([node.organization_id])
+                    | additional_orgs_permissions,
+                    aggregation_id=aggregation_node.organization_id,
+                    clean_models=clean_models,
+                )
+                # keep the states in a list: one/organization
+                risk_phi_stats_list.append(risk_phi_stats)
+                self._local_states[i] = local_state
+
+            self._shared_states = risk_phi_stats_list
+
+        # Now this assumes that in self._shared_states we have both:
+        # - the current risk_phi_stats computed from train
+        # - global_survival_statistics that should be perpetually given to
+        # the server which is stateless
+        # - the server state (parameters, log-likelihood and convergence stuff)
+        self._global_gradient_and_hessian = aggregation_node.update_states(
+            self._build_global_gradient_and_hessian(
+                shared_states=self._shared_states,
+                _algo_name="Compute gradient and hessian",
+            ),
+            round_idx=self.count,
+            authorized_ids=set(
+                train_data_node.organization_id for train_data_node in train_data_nodes
+            ),
+            clean_models=clean_models,
+        )
+        self.count += 1
+
+        # We now need local states cause we'll update the model
+        next_local_states = []
+        next_shared_states = []
+
+        for i, node in enumerate(train_data_nodes):
+            # define composite tasks (do not submit yet)
+            # for each composite task give description of Algo instead of a key for an
+            # algo
+            next_local_state, local_risk_phi_stats = node.update_states(
+                # This does a compute_local_phi_stats with skip=True this explains
+                # why there is no
+                # need to call it explicitly after self._survival_statistics_computed
+                # becomes True
+                self.algo.train(  # type: ignore
+                    node.data_sample_keys,
+                    shared_state=self._global_gradient_and_hessian,
+                    _algo_name=f"Training with {self.algo.__class__.__name__}",  # noqa: E501
+                ),
+                local_state=self._local_states[i],
+                round_idx=self.count,
+                authorized_ids=set([node.organization_id])
+                | additional_orgs_permissions,
+                aggregation_id=aggregation_node.organization_id,
+                clean_models=clean_models,
+            )
+            # keep the states in a list: one/node
+            next_local_states.append(next_local_state)
+            next_shared_states.append(local_risk_phi_stats)
+
+        self.count += 1
+
+        # Now that the models are updated we'll use them in the next round
+        self._local_states = next_local_states
+        self._shared_states = next_shared_states
+
+    @remote
+    def _compute_global_survival_statistics(self, shared_states):
+        """Aggregate different needed statistics.
+
+        Compute aggregated statistics such as distinct event times, sum of
+        covariates occuring on each event, total weights of parameters on all events,
+        etc.
+
+        Parameters
+        ----------
+        shared_states : list[dict]
+            A list of dicts of covariate statistics and distinct event times from each
+            center.
+            The expected keys are 'sum_features_on_events' which is a vector of the same
+            shape as a feature,
+            'distinct_event_times', which is a list with distinct event times.
+            'number_events_by_time' which is a list for all events with the number of
+            features that have the same event.
+            'total_number_of_samples', which is an integer,
+            'weights_counts_on_events', the weights of all covariates on
+            each event match number_events_by_time if no propensity model is used.
+
+
+        Returns
+        -------
+        list
+            The global list of distinct values
+        """
+        global_sum_features_on_events = np.zeros_like(
+            shared_states[0]["sum_features_on_events"]
+        )
+
+        # find out all distinct values while avoiding duplicates
+        distinct_event_times = []
+        for ls_and_dv in shared_states:
+            distinct_event_times += ls_and_dv["distinct_event_times"]
+        distinct_event_times = list(set(distinct_event_times))
+        distinct_event_times.sort()
+        # count them
+        num_global_event_times = len(distinct_event_times)
+        # aggregate statistics by suming
+        for ls_and_dv in shared_states:
+            global_sum_features_on_events += ls_and_dv["sum_features_on_events"]
+        # Count the number of tied event times for each client
+        list_number_events_by_time = []
+        total_number_samples = sum(
+            [ls_and_dv["total_number_samples"] for ls_and_dv in shared_states]
+        )
+
+        # Very weird to double check that it cannot be written in a more readable way
+        for ls_and_dv in shared_states:
+            global_ndt = []
+            for i, e in enumerate(distinct_event_times):
+                if e in ls_and_dv["distinct_event_times"]:
+                    idx = ls_and_dv["distinct_event_times"].index(e)
+                    global_ndt.append(ls_and_dv["number_events_by_time"][idx])
+                else:
+                    global_ndt.append(0)
+            list_number_events_by_time.append(global_ndt)
+
+        # We add what should amount at number events by time if weights=1
+        weights_counts_on_events = []
+        for d in distinct_event_times:
+            weights_counts_on_event = 0.0
+            for ls_and_dv in shared_states:
+                if d in ls_and_dv["distinct_event_times"]:
+                    idx = ls_and_dv["distinct_event_times"].index(d)
+                    weights_counts_on_event += ls_and_dv["weights_counts_on_events"][
+                        idx
+                    ]
+            weights_counts_on_events.append(weights_counts_on_event)
+
+        results = {}
+        results["global_survival_statistics"] = {}
+        results["global_survival_statistics"][
+            "distinct_event_times"
+        ] = distinct_event_times
+        results["global_survival_statistics"][
+            "global_sum_features_on_events"
+        ] = global_sum_features_on_events
+        results["global_survival_statistics"][
+            "list_number_events_by_time"
+        ] = list_number_events_by_time
+        results["global_survival_statistics"][
+            "num_global_events_time"
+        ] = num_global_event_times
+        results["global_survival_statistics"][
+            "total_number_samples"
+        ] = total_number_samples
+        results["global_survival_statistics"][
+            "weights_counts_on_events"
+        ] = weights_counts_on_events
+        results["moments"] = shared_states[0]["moments"]
+
+        return results
+
+    @remote
+    def _build_global_gradient_and_hessian(
+        self,
+        shared_states,
+    ):
+        r"""Compute global gradient and Hessian.
+
+        Use the gradient and hessian local blocks from clients to compute
+        the global gradient and hessian and use them to compute Newton-Raphson
+        update.
+        Regarding the use of an L1 regularization we match lifelines and use the
+        following coefficient by coefficient approximation of the absolute value (see
+        https://www.cs.ubc.ca/sites/default/files/tr/2009/TR-2009-19_0.pdf
+        page 7 equation 3) with t the index of the round:
+        .. math::
+            |x| = (x)_{+} + (-x)_{+}
+            (x)_{+} \\approx x + \\frac{1}{\\alpha} \\cdot \\log(1 + \\exp(-\\alpha x))
+            |x| \\approx \\frac{1}{\\alpha} \\cdot \\left(\\log(1 + \\exp(-\\alpha x) + \\log(1 + \\exp(\\alpha x)\\right)  # noqa: E501
+            \\alpha = 1.3^{t}
+
+        Parameters
+        ----------
+        risk_phi_stats_list : list
+            A list of blocks necessary to compute the gradients and hessian.
+        save_hessian_and_gradients : bool, optional
+            Wether or not to save the value of the gradient and the hessian as attribute
+            of the server, by default False
+        forced_step_size: float, optional
+            If not none, force the step size to be equal to the given value. Default
+            None.
+            Useful for tests.
+
+        Returns
+        -------
+        list
+            list of size 1 with NR update for the weights
+        """
+        # Server is stateless need to continuously feed it with
+        # global_survival_statistics
+
+        global_survival_statistics = shared_states[0]["global_survival_statistics"]
+        # It is important to use deepcopy to avoid side effect
+        # Otherwise, the value of self.global_sum_features_on_events will change
+        # This is already weighted
+        gradient = deepcopy(global_survival_statistics["global_sum_features_on_events"])
+        ll = 0.0
+        try:
+            gradient_shape = [e for e in gradient.shape if e > 1][0]
+        except IndexError:
+            gradient_shape = 1
+
+        risk_phi_stats_list = [e["local_phi_stats"] for e in shared_states]
+        risk_phi_list = [e["risk_phi"] for e in risk_phi_stats_list]
+        risk_phi_x_list = [e["risk_phi_x"] for e in risk_phi_stats_list]
+        risk_phi_x_x_list = [e["risk_phi_x_x"] for e in risk_phi_stats_list]
+
+        distinct_event_times = global_survival_statistics["distinct_event_times"]
+
+        hessian = np.zeros((gradient_shape, gradient_shape))
+
+        # Needed for robust estimation of SE
+        global_risk_phi_list = []
+        global_risk_phi_x_list = []
+        # We first sum over each event
+        for idxd, _ in enumerate(distinct_event_times):
+            # This factor amounts to d_i the number of events per time i if no weights
+            # otherwise it's the sum of the score of all d_i events
+            weighted_average = global_survival_statistics["weights_counts_on_events"][
+                idxd
+            ]
+
+            # We initialize both tensors at zeros for numerators (all denominators are
+            # scalar)
+            numerator = np.zeros(risk_phi_x_list[0][0].shape)
+            # The hessian has several terms due to deriving quotient of functions u/v
+            first_numerator_hessian = np.zeros((gradient_shape, gradient_shape))
+            denominator = 0.0
+            if np.allclose(weighted_average, 0.0):
+                continue
+            for i in range(len(risk_phi_stats_list)):
+                numerator += risk_phi_x_list[i][idxd]
+                denominator += risk_phi_list[i][idxd]
+                first_numerator_hessian += risk_phi_x_x_list[i][idxd]
+
+            global_risk_phi_list.append(denominator)
+            global_risk_phi_x_list.append(numerator)
+            # denominator being a sum of exponential it's always positive
+
+            assert denominator >= 0.0, "the sum of exponentials is negative..."
+            denominator = max(denominator, self._tol)
+            denominator_squared = max(denominator**2, self._tol)
+            c = numerator / denominator
+            ll -= weighted_average * np.log(denominator)
+            gradient -= weighted_average * np.squeeze(c)
+            hessian -= weighted_average * (
+                (first_numerator_hessian / denominator)
+                - (np.multiply.outer(numerator, numerator) / denominator_squared)
+            )
+
+        return {
+            "hessian": hessian,
+            "gradient": gradient,
+            "second_part_ll": ll,
+            "gradient_shape": gradient_shape,
+            "global_risk_phi_list": global_risk_phi_list,
+            "global_risk_phi_x_list": global_risk_phi_x_list,
+        }
+
+    @remote
+    def aggregate_moments(self, shared_states):
+        """Compute the global centered moments given the local results.
+
+        Parameters
+        ----------
+        shared_states : List
+            List of results (local_m1, local_m2, n_samples) from training nodes.
+
+        Returns
+        -------
+        dict
+            Global results to be shared with train nodes via shared_state.
+        """
+        # aggregate the moments.
+
+        tot_uncentered_moments = [
+            aggregation_mean(
+                [s[f"moment{k}"] for s in shared_states],
+                [s["n_samples"] for s in shared_states],
+            )
+            for k in range(1, 2 + 1)
+        ]
+        n_samples = sum([s["n_samples"].iloc[0] for s in shared_states])
+        results = {
+            f"global_centered_moment_{k}": compute_centered_moment(
+                tot_uncentered_moments[:k]
+            )
+            for k in range(1, 2 + 1)
+        }
+        results.update(
+            {
+                f"global_uncentered_moment_{k+1}": moment
+                for k, moment in enumerate(tot_uncentered_moments)
+            }
+        )
+        results.update({"total_n_samples": n_samples})
+        return results
+
+    def perform_predict(
+        self,
+        test_data_nodes: List[TestDataNode],
+        train_data_nodes: List[TrainDataNode],
+        round_idx: int,
+    ):
+        """Predict function for test_data_nodes on which the model have been trained on.
+
+        Parameters
+        ----------
+        test_data_nodes: List[TestDataNode]),
+            test data nodes to intersect with train data nodes to evaluate the
+            model on.
+        train_data_nodes: List[TrainDataNode],
+            train data nodes the model has been trained on.
+        round_idx: int,
+            round index.
+
+        Raises
+        ------
+            NotImplementedError: Cannot test on a node we did not train on for now.
+        """
+        for test_data_node in test_data_nodes:
+            matching_train_nodes = [
+                train_node
+                for train_node in train_data_nodes
+                if train_node.organization_id == test_data_node.organization_id
+            ]
+            if len(matching_train_nodes) == 0:
+                node_index = 0
+            else:
+                node_index = train_data_nodes.index(matching_train_nodes[0])
+
+            assert (
+                self._local_states is not None
+            ), "Cannot predict if no training has been done beforehand."
+            local_state = self._local_states[node_index]
+
+            test_data_node.update_states(
+                operation=self.algo.predict(
+                    data_samples=test_data_node.test_data_sample_keys,
+                    _algo_name=f"Testing with {self.algo.__class__.__name__}",
+                ),
+                traintask_id=local_state.key,
+                round_idx=round_idx,
+            )  # Init state for testtask
diff --git a/fedeca/strategies/webdisco_utils.py b/fedeca/strategies/webdisco_utils.py
new file mode 100644
index 00000000..a4e115f2
--- /dev/null
+++ b/fedeca/strategies/webdisco_utils.py
@@ -0,0 +1,196 @@
+"""Webdisco utils."""
+import tempfile
+from math import sqrt
+from typing import Union
+
+import numpy as np
+import pandas as pd
+from scipy import stats
+from substra import Client
+from substrafl.algorithms import Algo
+from substrafl.model_loading import _load_from_files
+
+from fedeca.utils.substrafl_utils import download_train_task_models_by_round
+
+
+def get_final_cox_model_function(
+    client: Client,
+    compute_plan_key: Union[str, Algo],
+    num_rounds: int,
+    standardize_data: bool,
+    duration_col: str,
+    event_col: str,
+    simu_mode: bool = False,
+    robust: bool = False,
+):
+    """Retreive first converged Cox model and corresponding hessian.
+
+    Parameters
+    ----------
+    Client : Client
+        The susbtrafl Client that registered the CP.
+    compute_plan_key : Union[str, Algo]
+        The key of the CP.
+    num_rounds : int
+        The number of rounds of the CP.
+    standardize_data : float, optional
+        Whether or not the data was standadized, by default 0.05
+    duration_col : str
+        The name of the duration column.
+    event_col : str
+        The name of the event column.
+    simu_mode : bool
+        Whether or not we are using simu mode. Note this could be inferred from
+        the Client.
+    robust : bool, optional
+        Retreive global statistics for robust variance estimation.
+
+    Returns
+    -------
+    tuple
+        Returns hessian, log-likelihood, Cox model's weights, global moments
+    """
+    found_params = False
+    found_hessian = False
+
+    for i in range(0, num_rounds):
+        actual_round = get_last_algo_from_round_count(i, standardize_data, simu_mode)
+        if not simu_mode:
+            # We have to use a custom function instead of download_algo_state
+            # to bypass substrafl limitation on number of local tasks per
+            # round
+            with tempfile.TemporaryDirectory() as temp_dir:
+                download_train_task_models_by_round(
+                    client=client,
+                    dest_folder=temp_dir,
+                    compute_plan_key=compute_plan_key,
+                    round_idx=actual_round,
+                )
+                algo = _load_from_files(input_folder=temp_dir)
+        else:
+            algo = compute_plan_key.intermediate_states[actual_round]
+
+        if algo.server_state["success"]:
+            if not found_params:
+                found_params = True
+                algo_params = algo
+            # Unfortunately finding params is only half of the job as we
+            # need the hessian computed on those params
+            else:
+                found_hessian = True
+                algo_hessian = algo
+                break
+
+        if i == max(num_rounds - 2, 0) and not found_params:
+            print(
+                """Cox model did not converge ! Taking params from the before final
+                round"""
+            )
+            found_params = True
+            algo_params = algo
+
+        if i == (num_rounds - 1):
+            if algo.server_state["success"]:
+                print("You are one round short to get the true final hessian !")
+            found_hessian = True
+            algo_hessian = algo
+
+    assert (
+        found_params and found_hessian
+    ), """Do more rounds it needs to converge and then do one more round
+        to get the final hessian"""
+
+    model = algo_params.model
+    hessian = algo_hessian.server_state["hessian"]
+    ll = algo_hessian.server_state["past_ll"]
+
+    if standardize_data:
+
+        global_moments = algo.global_moments
+        computed_vars = global_moments["vars"]
+        # We need to match pandas standardization
+        bias_correction = global_moments["bias_correction"]
+
+        computed_stds = computed_vars.transform(
+            lambda x: sqrt(x * bias_correction + 1e-16)
+        )
+    else:
+        computed_stds = pd.Series(np.ones((model.fc1.weight.shape)).squeeze())
+        global_moments = {}
+
+    # We unstandardize the weights
+    final_params = model.fc1.weight.data.numpy().squeeze() / computed_stds.to_numpy()
+
+    # Robust estimation
+    global_robust_statistics = {}
+    if robust:
+        global_robust_statistics = algo_hessian.server_state["global_robust_statistics"]
+        global_robust_statistics["global_moments"] = global_moments
+
+    return hessian, ll, final_params, computed_stds, global_robust_statistics
+
+
+def compute_summary_function(final_params, variance_matrix, alpha=0.05):
+    """Compute summary function.
+
+    Parameters
+    ----------
+    final_params : np.ndarray
+        The estimated vallues of Cox model coefficients.
+    variance_matrix : np.ndarray
+        Computed variance matrix whether using robust estimation or not.
+    alpha : float, optional
+        The quantile level to test, by default 0.05
+
+    Returns
+    -------
+    pd.DataFrame
+        Summary of IPTW analysis as in lifelines.
+    """
+    se = np.sqrt(variance_matrix.diagonal())
+    ci = 100 * (1 - alpha)
+    z = stats.norm.ppf(1 - alpha / 2)
+    Z = final_params / se
+    U = Z**2
+    pvalues = stats.chi2.sf(U, 1)
+    summary = pd.DataFrame()
+    summary["coef"] = final_params
+    summary["se(coef)"] = se
+    summary[f"coef lower {round(ci)}%"] = final_params - z * se
+    summary[f"coef upper {round(ci)}%"] = final_params + z * se
+    summary["z"] = Z
+    summary["p"] = pvalues
+
+    return summary
+
+
+def get_last_algo_from_round_count(num_rounds, standardize_data=True, simu_mode=False):
+    """Get true number of rounds.
+
+    Parameters
+    ----------
+    num_rounds : list[int]
+        _description_
+    standardize_data : bool, optional
+        _description_, by default True
+    simu_mode : bool
+        Whether or not we are in simu mode.
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+    # One count for each aggregation starting at 1 (init round): +1 for
+    # standardization +1 for global_survival_statistics
+    if not simu_mode:
+        actual_number_of_rounds = 2 * (num_rounds + 1) + 2
+    # Minus 1 stems from the fact that simu mode is peculiar
+    # and that we start adding to it only in the build_compute_plan
+    # aka 1 before
+
+    if simu_mode:
+        actual_number_of_rounds = (num_rounds + 1) + 2
+    if not standardize_data:
+        actual_number_of_rounds -= 1
+    return actual_number_of_rounds
diff --git a/fedeca/tests/__init__.py b/fedeca/tests/__init__.py
new file mode 100644
index 00000000..d0d6f986
--- /dev/null
+++ b/fedeca/tests/__init__.py
@@ -0,0 +1 @@
+"""Init file for tests."""
diff --git a/fedeca/tests/common.py b/fedeca/tests/common.py
new file mode 100644
index 00000000..8093341d
--- /dev/null
+++ b/fedeca/tests/common.py
@@ -0,0 +1,59 @@
+"""Common class and functions for tests."""
+import tempfile
+import unittest
+from pathlib import Path
+from typing import Union
+
+import fedeca
+
+
+def rmdir(directory: Union[Path, str]) -> None:
+    """Remove all local-folder file.
+
+    Parameters
+    ----------
+    directory : Union[Path, str]
+        The directory in which to recursively search.
+    """
+    directory = Path(directory)
+    for item in directory.iterdir():
+        if item.is_dir() and "local-worker" in str(item):
+            rmdir(item)
+
+
+class TestTempDir(unittest.TestCase):
+    """Base class for tests.
+
+    Base class which should be used for every tests that need
+    a temporary directory (to store data, logs etc).
+    The directory is shared across the tests of a TestCase, and
+    it's removed at the end of the TestCase (not at each test !).
+
+    Attributes
+    ----------
+    test_dir: str
+        the path to the temporary directory of the TestCase.
+
+    Notes
+    -----
+        If the class methods setUpClass or tearDownClass are overridden,
+        please make sure to call `super()...``
+    """
+
+    _test_dir = None
+    test_dir = None
+
+    @classmethod
+    def setUpClass(cls):
+        """Set up the class."""
+        super(TestTempDir, cls).setUpClass()
+        cls._test_dir = tempfile.TemporaryDirectory()
+        cls.test_dir = cls._test_dir.name  # Keep a reference to the path
+
+    @classmethod
+    def tearDownClass(cls):
+        """Tear down the class."""
+        super(TestTempDir, cls).tearDownClass()
+        # This function rm the directory
+        cls._test_dir.cleanup()
+        rmdir(Path(fedeca.__file__).parent.parent)
diff --git a/fedeca/tests/strategies/__init__.py b/fedeca/tests/strategies/__init__.py
new file mode 100644
index 00000000..48d9814e
--- /dev/null
+++ b/fedeca/tests/strategies/__init__.py
@@ -0,0 +1 @@
+"""Init file to test strategies."""
diff --git a/fedeca/tests/strategies/test_webdisco.py b/fedeca/tests/strategies/test_webdisco.py
new file mode 100644
index 00000000..879a820d
--- /dev/null
+++ b/fedeca/tests/strategies/test_webdisco.py
@@ -0,0 +1,645 @@
+"""Tests for webdisco."""
+import copy
+import sys
+from math import sqrt
+
+import lifelines
+import numpy as np
+import pandas as pd
+import pytest
+import torch
+from autograd import elementwise_grad
+from autograd import numpy as anp
+from lifelines.utils import StepSizer, concordance_index
+from scipy.linalg import inv, norm
+from scipy.linalg import solve as spsolve
+from substrafl.algorithms.pytorch import weight_manager
+from torch import nn
+
+from fedeca.algorithms import TorchWebDiscoAlgo
+from fedeca.strategies import WebDisco
+from fedeca.strategies.webdisco_utils import (
+    compute_summary_function,
+    get_final_cox_model_function,
+)
+from fedeca.tests.common import TestTempDir
+from fedeca.utils.data_utils import generate_cox_data_and_substra_clients
+from fedeca.utils.substrafl_utils import (
+    Experiment,
+    get_outmodel_function,
+    make_c_index_function,
+    make_substrafl_torch_dataset_class,
+)
+from fedeca.utils.survival_utils import (
+    CoxPHModelTorch,
+    analytical_gradient_cox_partial_loglikelihood_breslow_torch,
+    cox_partial_loglikelihood_breslow_torch,
+    hessian_torch,
+)
+
+DTYPES_TORCH = {"float32": torch.float, "float64": torch.double}
+
+
+class TestWebDisco(TestTempDir):
+    """Webdisco tests class."""
+
+    @classmethod
+    def tearDownClass(cls):
+        """Tear down the class."""
+        super(TestWebDisco, cls).tearDownClass()
+        # We need to avoid persistence of DB in between TestCases, this is an obscure
+        # hack but it's working
+        first_client = cls.clients[list(cls.clients.keys())[0]]
+        database = first_client._backend._db._db._data
+        if len(database.keys()) > 1:
+            for k in list(database.keys()):
+                database.pop(k)
+
+    @classmethod
+    def get_lifelines_results(cls):
+        """Get lifelines results."""
+        # Fitting pooled data with lifelines and check
+        # that lifelines give decent results
+        cls.cphf = lifelines.fitters.coxph_fitter.CoxPHFitter(
+            penalizer=cls.penalizer, l1_ratio=cls.l1_ratio
+        )
+        lifelines_kwargs = {
+            "duration_col": cls._duration_col,
+            "event_col": cls._event_col,
+            "robust": cls.robust,
+        }
+        if cls.use_propensity:
+            lifelines_kwargs["weights_col"] = "weights"
+
+        cls.cphf.fit(cls.df, **lifelines_kwargs)
+
+        # Removing lifelines specific preprocessing
+        if "weights" in cls.df:
+            cls.df = cls.df.drop(columns=["weights"])
+
+        # If we used propensity then I guess coefficients might differ
+        if not cls.use_propensity and np.allclose(cls.penalizer, 0.0):
+            # To get closer to the true Cox coefficients one has to reduce
+            # censorship + add more data points. With 100000 data points and no
+            # censorship one can get pretty good estimates (but still not
+            # perfect) approx rtol=1e-2
+            assert (
+                np.linalg.norm(cls.coeffs - np.array(cls.cphf.params_)) < 1.0
+            ), "Lifelines could not fit the data."
+
+    # This is very long and thus should be only executed once that is why we use
+    # setUpClass unlike setUp wich would otherwise get executed for each method
+    @classmethod
+    def setUpClass(
+        cls,
+        backend="pytorch",
+        n_clients=3,
+        n_samples_per_client=100,
+        ndim=10,
+        dtype="float64",
+        initial_step_size=0.95,
+        seed=43,
+        standardize_data=True,
+        l1_ratio=0.0,
+        penalizer=0.0,
+        use_propensity=False,
+        learning_rate_strategy="lifelines",
+        robust=False,
+        run=True,
+    ):
+        """Set up the test class for experiment comparison.
+
+        Parameters
+        ----------
+        cls : TestCase class
+            Test class instance.
+        backend : str, optional
+            Backend type, by default "pytorch".
+        n_clients : int, optional
+            Number of clients, by default 3.
+        n_samples_per_client : int, optional
+            Number of samples per client, by default 100.
+        ndim : int, optional
+            Number of dimensions, by default 10.
+        dtype : str, optional
+            Data type, by default "float64".
+        initial_step_size : float, optional
+            Initial step size, by default 0.95.
+        seed : int, optional
+            Random seed, by default 43.
+        standardize_data : bool, optional
+            Standardize data, by default True.
+        l1_ratio : float, optional
+            L1 ratio, by default 0.0.
+        penalizer : float, optional
+            Penalizer, by default 0.0.
+        use_propensity : bool, optional
+            Use propensity scores, by default False.
+        learning_rate_strategy : str, optional
+            Learning rate strategy, by default "lifelines".
+        robust: bool, optional
+            Whether to perform robust variance estimation.
+        run : bool, optional
+            Whether to run WebDisco or not.
+        """
+        super().setUpClass()
+        cls.n_clients = n_clients
+        cls.backend = backend
+        # Data creation, we use appropriate data to avoid
+        # ill conditionned hessian
+        cls.ndim = ndim
+        cls.dtype = dtype
+        cls.penalizer = penalizer
+        cls.l1_ratio = l1_ratio
+        cls.standardize_data = standardize_data
+        cls.initial_step_size = initial_step_size
+        cls.seed = seed
+        cls.use_propensity = use_propensity
+        cls.learning_rate_strategy = learning_rate_strategy
+        cls.robust = robust
+
+        # Creating pooled data from parameters
+        # Note that this could probably be factorized across TestCases
+        (
+            cls.clients,
+            cls.train_data_nodes,
+            cls.test_data_nodes,
+            cls.dfs,
+            cls.df,
+            cls.coeffs,
+        ) = generate_cox_data_and_substra_clients(
+            n_clients=cls.n_clients,
+            ndim=cls.ndim,
+            backend_type="subprocess",
+            data_path=cls.test_dir,
+            seed=seed,
+            n_per_client=n_samples_per_client,
+            add_treated=cls.use_propensity,
+        )
+
+        assert cls.coeffs.shape == (cls.ndim,)
+
+        cls.ds_client = cls.clients[list(cls.clients)[0]]
+
+        assert len(cls.df.index) == len(
+            cls.df["T"].unique()
+        ), "There are ties, lifelines and webdisco will differ"
+
+        cls._target_cols = ["T", "E"]
+        cls._duration_col = "T"
+        cls._event_col = "E"
+        if cls.use_propensity:
+            cls._treated_col = "treated"
+        else:
+            cls._treated_col = None
+
+        # We order the dataframe to use lifelines get_efron function
+        sort_by = [cls._duration_col, cls._event_col]
+        cls.df = cls.df.sort_values(by=sort_by)
+
+        # We compute lots of reference quantities in "pooled" setting
+        cls.X = cls.df.drop(columns=cls._target_cols).to_numpy(cls.dtype)
+        cls.standardize_data = standardize_data
+        if cls.standardize_data:
+            cls.true_means = cls.X.mean(axis=0)
+            # Very important to match pandas
+            cls.true_stds = cls.X.std(axis=0, ddof=1)
+            cls.X -= cls.true_means
+            cls.X /= cls.true_stds
+
+        if cls.use_propensity:
+            propensity_strategy = "aiptw"
+
+            class LogisticRegressionTorch(nn.Module):
+                def __init__(self):
+                    super(LogisticRegressionTorch, self).__init__()
+                    self.fc1 = nn.Linear(cls.ndim, 1).to(torch.float64)
+
+                def forward(self, x, eval=False):
+                    x = self.fc1(x)
+                    return torch.sigmoid(x)
+
+            torch.manual_seed(cls.seed)
+            propensity_model = LogisticRegressionTorch()
+            propensity_model.eval()
+            # Xprop is neither standardized nor contains treated
+            cls.Xprop = cls.df.drop(
+                columns=cls._target_cols + [cls._treated_col]
+            ).to_numpy(cls.dtype)
+            with torch.no_grad():
+                propensity_scores = (
+                    propensity_model(torch.from_numpy(cls.Xprop)).detach().numpy()
+                )
+            treated = cls.df[cls._treated_col].to_numpy().reshape((-1, 1))
+            cls.weights = treated * 1.0 / propensity_scores + (1 - treated) * 1.0 / (
+                1.0 - propensity_scores
+            )
+            # This is only for lifelines then we need to remove it
+            cls.df["weights"] = cls.weights
+        else:
+            propensity_model = None
+            cls.weights = None
+            propensity_strategy = "aiptw"  # None is not supported
+
+        cls.E = cls.df["E"].to_numpy(cls.dtype)
+        cls.df["time_multiplier"] = [2.0 * e - 1.0 for e in cls.df["E"].tolist()]
+        cls.t = (cls.df["T"] * cls.df["time_multiplier"]).to_numpy(cls.dtype)
+        cls.df = cls.df.drop(columns=["time_multiplier"])
+
+        cls.get_lifelines_results()
+
+        # A round is defined by a local training step followed by an aggregation
+        # operation
+        # This was hand-tuned don't change
+        cls.NUM_ROUNDS = 8
+        torch_dtype = DTYPES_TORCH[cls.dtype]
+        if cls.use_propensity:
+            if propensity_strategy == "aiptw":
+                ndim = cls.X.shape[1]
+            else:
+                ndim = 1
+        else:
+            ndim = cls.X.shape[1]
+
+        cls.model = CoxPHModelTorch(ndim=ndim, torch_dtype=torch_dtype)
+
+        cls.dataset = make_substrafl_torch_dataset_class(
+            cls._target_cols, cls._event_col, cls._duration_col, dtype=cls.dtype
+        )
+
+        # Needed for MyAlgo to avoid confusing the cls of this class and of MyAlgo
+        # (bug in substrafl)
+        model = cls.model
+        dataset = cls.dataset
+        duration_col = cls._duration_col
+        event_col = cls._event_col
+        treated_col = cls._treated_col
+
+        class MyAlgo(TorchWebDiscoAlgo):
+            def __init__(self, *args, **kwargs):
+                del args
+                del kwargs
+                super().__init__(
+                    model=model,
+                    batch_size=sys.maxsize,
+                    dataset=dataset,
+                    seed=seed,
+                    duration_col=duration_col,
+                    event_col=event_col,
+                    treated_col=treated_col,
+                    standardize_data=standardize_data,
+                    penalizer=penalizer,
+                    l1_ratio=l1_ratio,
+                    initial_step_size=initial_step_size,
+                    learning_rate_strategy=learning_rate_strategy,
+                    propensity_model=propensity_model,
+                    propensity_strategy=propensity_strategy,
+                    store_hessian=True,
+                )
+
+        cls.strategy = WebDisco(algo=MyAlgo(), standardize_data=cls.standardize_data)
+        cls.cindex = make_c_index_function(cls._duration_col, cls._event_col)
+        cls.webdisco_experiment = Experiment(
+            ds_client=cls.clients[list(cls.clients.keys())[0]],
+            strategies=[cls.strategy],
+            train_data_nodes=cls.train_data_nodes,
+            num_rounds_list=[cls.NUM_ROUNDS],
+            metrics_dicts_list=[{"C-index": cls.cindex}],
+            experiment_folder=cls.test_dir,
+        )
+        if cls.run:
+            cls.webdisco_experiment.run()
+
+    def test_aggregate_statistics(self):
+        """Test the aggregated statistics."""
+        if not self.use_propensity:
+            # We retrieve the global survival statistics computed by WebDisco
+            global_survival_statistics = get_outmodel_function(
+                "Compute global statistics from local quantities",
+                client=self.ds_client,
+                compute_plan_key=self.webdisco_experiment.compute_plan_keys[0].key,
+            )["global_survival_statistics"]
+            computed_distinct_event_times = global_survival_statistics[
+                "distinct_event_times"
+            ]
+
+            computed_list_nds = global_survival_statistics["list_number_events_by_time"]
+            computed_statistics = global_survival_statistics[
+                "global_sum_features_on_events"
+            ]
+            computed_number_of_distinct_values = global_survival_statistics[
+                "num_global_events_time"
+            ]
+            # True statistics and distinct values
+            true_distinct_event_times = np.unique(self.t[self.t > 0])
+            true_n_distinct_event_times = len(true_distinct_event_times)
+            true_statistics = np.zeros_like(self.X[0])
+            true_nds = []
+            for v in true_distinct_event_times:
+                indices = np.where(self.t == v)[0]
+                true_statistics += self.X[indices].sum(axis=0)
+                true_nds.append(len(indices))
+            assert np.allclose(true_distinct_event_times, computed_distinct_event_times)
+            assert np.allclose(
+                true_n_distinct_event_times, computed_number_of_distinct_values
+            )
+            assert np.allclose(true_nds, [sum(e) for e in zip(*computed_list_nds)])
+            assert np.allclose(
+                true_statistics, computed_statistics
+            ), computed_statistics
+
+    def test_compute_true_moments(self):
+        """Test the computation of the moments."""
+        if self.standardize_data:
+            # We retrieve the global moments computed by WebDisco
+            aggregated_moments = get_outmodel_function(
+                "Compute the global centered moments given the local results.",
+                client=self.ds_client,
+                compute_plan_key=self.webdisco_experiment.compute_plan_keys[0].key,
+            )
+            computed_means = aggregated_moments["global_uncentered_moment_1"].to_numpy()
+            computed_vars = aggregated_moments["global_centered_moment_2"]
+            bias_correction = (aggregated_moments["total_n_samples"]) / float(
+                aggregated_moments["total_n_samples"] - 1
+            )
+            computed_stds = computed_vars.transform(
+                lambda x: sqrt(x * bias_correction + 1e-16)
+            )
+
+            assert np.allclose(computed_means, self.true_means)
+            assert np.allclose(computed_stds, self.true_stds)
+
+    def test_newton_raphson(self):
+        """Test newton raphson algorithm."""
+        # We use the initial model as a starting point
+        coxmodel = copy.deepcopy(self.model)
+        n = self.X.shape[0]
+
+        # We do batch Gradient Newton-Raphson with lifelines tricks and compare it back
+        # to WebDisco
+        stepsizer = StepSizer(self.initial_step_size)
+        for i in range(self.NUM_ROUNDS):
+            coxmodel.zero_grad()
+            coxpl = cox_partial_loglikelihood_breslow_torch(coxmodel, self.X, self.t)
+            coxpl.backward()
+            # We compute the analytical gradient in the pooled case
+            ana_grad = analytical_gradient_cox_partial_loglikelihood_breslow_torch(
+                coxmodel, self.X, self.t
+            )
+            # We compare it to torch autodiff just to be sure
+            assert torch.allclose(coxmodel.fc1.weight.grad, ana_grad)
+            true_gradient = coxmodel.fc1.weight.grad
+            # We compute the hessian
+            true_hessian = hessian_torch(
+                cox_partial_loglikelihood_breslow_torch(coxmodel, self.X, self.t),
+                coxmodel.fc1.weight,
+            ).squeeze()
+            # Check hessian and gradients
+            webdisco_gradient_and_hessian = get_outmodel_function(
+                "Compute gradient and hessian",
+                client=self.ds_client,
+                compute_plan_key=self.webdisco_experiment.compute_plan_keys[0].key,
+                idx_task=i,
+            )
+            if self.penalizer > 0.0:
+                webdisco_gradient_and_hessian_client = get_outmodel_function(
+                    "Training with MyAlgo",
+                    client=self.ds_client,
+                    compute_plan_key=self.webdisco_experiment.compute_plan_keys[0].key,
+                    idx_task=i * self.n_clients,
+                )
+            webdisco_gradient = torch.from_numpy(
+                webdisco_gradient_and_hessian["gradient"]
+            )
+            webdisco_hessian = torch.from_numpy(
+                webdisco_gradient_and_hessian["hessian"]
+            )
+
+            # We test the resulting hessian and gradients
+            if not self.use_propensity:
+                # We test against "true" gradient when it's not weighted
+                assert torch.allclose(true_gradient, -webdisco_gradient, atol=1e-4)
+                assert torch.allclose(
+                    true_hessian.squeeze(), -webdisco_hessian, atol=1e-4
+                )
+
+            class FakeDF:
+                def __init__(self, values):
+                    self.values = values
+
+            # We always test against lifelines
+            if self.weights is None:
+                self.weights = np.ones((self.X.shape[0],))
+
+            (
+                lifelines_hessian,
+                lifelines_gradient,
+                _,
+            ) = self.cphf._get_efron_values_single(
+                FakeDF(self.X),
+                self.df[self._duration_col],
+                self.df[self._event_col],
+                pd.Series(self.weights.squeeze()),
+                entries=None,
+                beta=coxmodel.fc1.weight.data.detach().numpy().squeeze(),
+            )
+
+            if self.penalizer > 0.0:
+                # We use a smooth approximation for the L1 norm (for more details
+                # see docstring of function)
+                # we use numpy autograd to be able to compute the first and second
+                # order derivatives of this expression
+                current_weights = coxmodel.fc1.weight.data.detach().numpy()
+                alpha = 1.3 ** (i + 1)
+
+                def soft_abs(x, a):
+                    return 1 / a * (anp.logaddexp(0, -a * x) + anp.logaddexp(0, a * x))
+
+                def elastic_net_penalty(beta, a):
+                    l1 = self.l1_ratio * soft_abs(beta, a)
+                    l2 = 0.5 * (1 - self.l1_ratio) * (beta**2)
+                    reg = n * (self.penalizer * (l1 + l2)).sum()
+                    return reg
+
+                # Regularization affects both the gradient and the hessian
+                # producing a better conditioned hessian.
+                d_elastic_net_penalty = elementwise_grad(elastic_net_penalty)
+                dd_elastic_net_penalty = elementwise_grad(d_elastic_net_penalty)
+                # lifelines trick to progressively sharpen the approximation of
+                # the l1 regularization.
+                # We are trying to **maximize** the log-likelihood that is why
+                # we put a negative sign and not a plus sign on the regularization.
+                # The fact that we are actually moving towards the maximum and
+                # not towards the minimum is because -H is psd.
+                true_gradient += d_elastic_net_penalty(current_weights, alpha)
+                true_hessian[
+                    np.diag_indices(max(true_hessian.shape))
+                ] += dd_elastic_net_penalty(current_weights, alpha)
+                lifelines_gradient -= d_elastic_net_penalty(
+                    current_weights, alpha
+                ).squeeze()
+                lifelines_hessian[
+                    np.diag_indices(max(lifelines_hessian.shape))
+                ] -= dd_elastic_net_penalty(current_weights, alpha).squeeze()
+                # WebDisco does that internally but it only appears in the client's side
+                webdisco_hessian = webdisco_gradient_and_hessian_client["server_state"][
+                    "hessian"
+                ]
+                webdisco_gradient = webdisco_gradient_and_hessian_client[
+                    "server_state"
+                ]["gradient"]
+
+            if not self.use_propensity:
+                assert np.allclose(webdisco_hessian, -true_hessian, atol=1e-4)
+                assert np.allclose(webdisco_gradient, -true_gradient, atol=1e-4)
+            assert np.allclose(webdisco_hessian, lifelines_hessian, atol=1e-4)
+            assert np.allclose(webdisco_gradient, lifelines_gradient, atol=1e-4)
+
+            # Update parameters as in WebDisco using lifelines stepsizer
+            true_inv_h_dot_g_T = spsolve(
+                -webdisco_hessian,
+                webdisco_gradient.reshape((-1, 1)),
+                assume_a="pos",
+                check_finite=False,
+            )
+            norm_delta = norm(true_inv_h_dot_g_T)
+            if self.learning_rate_strategy == "lifelines":
+                step_size = stepsizer.update(norm_delta).next()
+            else:
+                step_size = 1.0
+
+            updates = step_size * true_inv_h_dot_g_T
+            weight_manager.increment_parameters(
+                model=coxmodel,
+                updates=[torch.from_numpy(updates.reshape((1, -1)))],
+                with_batch_norm_parameters=False,
+            )
+
+    @pytest.mark.slow
+    def test_descent(self):
+        """Test descent."""
+        # We measure the accuracy of the final fit
+        (
+            self.hessian,
+            self.ll,
+            self.final_params,
+            self.computed_stds,
+            _,
+        ) = get_final_cox_model_function(
+            self.ds_client,
+            self.webdisco_experiment.compute_plan_keys[0].key,
+            self.NUM_ROUNDS,
+            self.standardize_data,
+            self._duration_col,
+            self._event_col,
+        )
+        m = copy.deepcopy(self.model)
+        # We unnormalize the weights as self.X is normalized
+        m.fc1.weight.data = torch.from_numpy(
+            self.final_params * self.computed_stds.to_numpy()
+        )
+        m.eval()
+        with torch.no_grad():
+            ypred = m(torch.from_numpy(self.X)).detach().numpy()
+        # Penalizer and propensity score affect ability to achieve good C-index
+        # or to retrieve exact original Cox coeff
+        if (not self.use_propensity) and np.allclose(self.penalizer, 0.0):
+            # We validate the fit wrt the c-index of hazard ratios ranking
+            assert (
+                concordance_index(np.abs(self.t), -ypred, self.E) > 0.85
+            ), "WebDiscoTorch model could not rank the pairs well enough."
+            # We validate the fit wrt the real Cox model used to generate the data
+            assert (
+                np.linalg.norm(m.fc1.weight.data.numpy().squeeze() - self.coeffs) < 1.0
+            ), "WebDiscoTorch could not retrieve the true Cox model."
+
+        # We match lifelines in all cases (including use of propensity)
+        # except when there is a penalizer as lifelines
+        # does like 50 iterations, which would take days in Substra
+        # if we could afford to change cls.NUM_ROUNDS to say 60 isntead of 8
+        # we could get rid of the if
+        if np.allclose(self.penalizer, 0.0):
+            assert np.allclose(self.cphf.params_, self.final_params, atol=1e-4)
+
+    @pytest.mark.slow
+    def test_standard_deviations(self):
+        """Test standard deviations."""
+        (
+            self.hessian,
+            self.ll,
+            self.final_params,
+            self.computed_stds,
+            _,
+        ) = get_final_cox_model_function(
+            self.ds_client,
+            self.webdisco_experiment.compute_plan_keys[0].key,
+            self.NUM_ROUNDS,
+            self.standardize_data,
+            self._duration_col,
+            self._event_col,
+        )
+        self.scaled_variance_matrix = -inv(self.hessian) / np.outer(
+            self.computed_stds, self.computed_stds
+        )
+        summary = compute_summary_function(
+            self.final_params, self.scaled_variance_matrix, self.cphf.alpha
+        )
+        ground_truth_df = self.cphf.summary
+        ground_truth_df = ground_truth_df[summary.columns]
+        # In case index are not matching
+        summary.index = ground_truth_df.index
+        gt_ll = self.cphf.log_likelihood_
+        assert np.allclose(self.ll.item(), gt_ll)
+        pd.testing.assert_frame_equal(
+            summary, ground_truth_df, check_names=False, atol=1e-03
+        )
+
+
+class TestWebDiscoUnstandardized(TestWebDisco):
+    """Test for unstandardized web disco."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Set up class."""
+        super().setUpClass(standardize_data=False)
+
+
+class TestWebDiscoWithWeights(TestWebDisco):
+    """Test for weighted web disco."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Set up class."""
+        super().setUpClass(use_propensity=True)
+
+    def test_compute_true_moments(self):
+        """Test computation of moments."""
+        pass
+
+    def test_aggregate_statistics(self):
+        """Test aggregated statistics."""
+        pass
+
+
+class TestWebDiscoWithPenalizer(TestWebDisco):
+    """Tests web disco with penalizers."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Set up class."""
+        super().setUpClass(penalizer=0.1, l1_ratio=0.5)
+
+    def test_compute_true_moments(self):
+        """Test computation of moments."""
+        pass
+
+    def test_aggregate_statistics(self):
+        """Test aggregated statistics."""
+        pass
+
+    def test_standard_deviations(self):
+        """Test standard deviations."""
+        # It's a pity but lifelines just does too many iterations to be able to test it
+        # in a reasonable amount of time due to the slowness of this implementation
+        # TODO test it in simu mode
+        pass
diff --git a/fedeca/tests/test_bootstrap_mixin.py b/fedeca/tests/test_bootstrap_mixin.py
new file mode 100644
index 00000000..c4e8891d
--- /dev/null
+++ b/fedeca/tests/test_bootstrap_mixin.py
@@ -0,0 +1,61 @@
+"""Test file for the BootstrapMixin."""
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import pytest
+
+from fedeca.utils.survival_utils import BootstrapMixin
+
+seeds = np.arange(5)
+
+
+@pytest.fixture
+def test_settings():
+    """Set up for the tests of the competitors class."""
+    rng = np.random.default_rng(123)
+    n_samples = 5
+    n_features = 3
+
+    X = rng.normal(size=(n_samples, n_features))
+    data = pd.DataFrame(data=X, columns=[f"X_{i}" for i in range(n_features)])
+    data["time"] = np.array([5, 6, 7, 8, 9])
+    data["event"] = np.array([1, 1, 0, 1, 1])
+    data["treatment_allocation"] = np.array([1, 0, 0, 1, 1])
+    data["propensity_scores"] = np.array([0.55, 0.34, 0.76, 0.29, 0.32])
+    data["weights"] = np.array([1.2, 1.4, 1.6, 1.8, 1.1])
+
+    return data
+
+
+class FakeModel(BootstrapMixin):
+    """Fake model that supports bootstrapping."""
+
+    def __init__(self, seed):
+        """Initialize with seed."""
+        self.rng = np.random.default_rng(seed)
+
+    def point_estimate(self, data: pd.DataFrame) -> npt.ArrayLike:
+        """Return a point estimate of the treatment effect."""
+        return self.rng.random(size=2)
+
+
+@pytest.mark.parametrize("seed", seeds)
+def test_bootstrap(test_settings, seed: int):
+    """Test BootstrapMixin."""
+    model = FakeModel(seed)
+
+    # Test resampling
+    data = test_settings
+    data_resampled = data.sample(
+        data.shape[0], replace=True, random_state=np.random.default_rng(seed)
+    )
+    data_bootstrapped = model.bootstrap_sample(data, seed=seed)
+    pd.testing.assert_frame_equal(data_resampled, data_bootstrapped)
+
+    # Test bootstrapping
+    std_est = model.bootstrap_std(test_settings, n_bootstrap=10)
+    rng = np.random.default_rng(seed)
+    std_true = np.std(rng.random(size=(10, 2)), axis=0)
+
+    assert std_est is not None
+    np.testing.assert_array_equal(std_est, std_true)
diff --git a/fedeca/tests/test_cate.py b/fedeca/tests/test_cate.py
new file mode 100644
index 00000000..b0c446d3
--- /dev/null
+++ b/fedeca/tests/test_cate.py
@@ -0,0 +1,55 @@
+"""Tests for cate related functionality."""
+import numpy as np
+import pytest
+from scipy.stats import binomtest
+
+from fedeca.utils.survival_utils import CoxData
+
+list_constant_cate = [0.5, 1.0, 1.5]
+list_random_cate = ["linear", "random"]
+list_features_type = ["cov_toeplitz", "cov_uniform", "indep_gauss"]
+
+
+@pytest.mark.parametrize("cate", list_constant_cate)
+def test_constant_cate(cate: float):
+    """Tests of data generation with constant cate."""
+    data_gen = CoxData(
+        n_samples=1,
+        ndim=10,
+        cate=cate,
+        seed=42,
+    )
+
+    n_samples = 1001
+    data_gen.generate_data(n_samples=n_samples)
+
+    cate_vector = data_gen.probability_treated
+    assert cate_vector is not None
+    np.testing.assert_allclose(cate_vector, cate)
+
+
+@pytest.mark.parametrize("features_type", list_features_type)
+@pytest.mark.parametrize("cate", list_random_cate)
+def test_linear_cate(features_type, cate):
+    """Tests of data generation with linear cate."""
+    data_gen = CoxData(
+        n_samples=1,
+        ndim=10,
+        cate=cate,
+        features_type=features_type,
+        seed=42,
+    )
+
+    n_samples = 1001
+    data_gen.generate_data(n_samples=n_samples)
+
+    cate_vector = data_gen.probability_treated
+    assert cate_vector is not None
+    # * linear_cate
+    #   features are multivariate normal variables, whose linear combination is
+    #   also normal, cate_vector is therefore log-normal, with median exp(0) = 1
+    # * random_cate
+    #   cate_vector is by definition log-normal, with median exp(0) = 1
+    # test median
+    conf_int = binomtest(np.sum(cate_vector > 1), cate_vector.size, 0.5).proportion_ci()
+    assert conf_int.low <= 0.5 <= conf_int.high
diff --git a/fedeca/tests/test_competitors.py b/fedeca/tests/test_competitors.py
new file mode 100644
index 00000000..2683518f
--- /dev/null
+++ b/fedeca/tests/test_competitors.py
@@ -0,0 +1,148 @@
+"""Test file for the competitors."""
+import numpy as np
+import pandas
+import pytest
+from indcomp import MAIC
+from lifelines import CoxPHFitter
+from sklearn.linear_model import LogisticRegression
+
+from fedeca import MatchingAjudsted, NaiveComparison, PooledIPTW
+
+
+@pytest.fixture
+def test_settings():
+    """Set up for the tests of the competitors class."""
+    rng = np.random.default_rng(123)
+    n_samples = 5
+    n_features = 3
+
+    X = rng.normal(size=(n_samples, n_features))
+    data = pandas.DataFrame(data=X, columns=[f"X_{i}" for i in range(n_features)])
+    data["time"] = np.array([5, 6, 7, 8, 9])
+    data["event"] = np.array([1, 1, 0, 1, 1])
+    data["treatment_allocation"] = np.array([1, 0, 0, 1, 1])
+    data["propensity_scores"] = np.array([0.55, 0.34, 0.76, 0.29, 0.32])
+    data["weights"] = np.array([1.2, 1.4, 1.6, 1.8, 1.1])
+
+    return data
+
+
+def test_naive_comparison(test_settings):
+    """Tests for naive comparison class."""
+    data = test_settings
+
+    naive_comparison = NaiveComparison(
+        treated_col="treatment_allocation", event_col="event", duration_col="time"
+    )
+    naive_comparison.fit(data)
+
+    cox_model = CoxPHFitter()
+    cox_model.fit(
+        data[["time", "event", "treatment_allocation"]],
+        "time",
+        "event",
+    )
+
+    pandas.testing.assert_frame_equal(
+        left=cox_model.summary, right=naive_comparison.results_
+    )
+
+
+def test_iptw(test_settings):
+    """Tests for the PooledIPTW class."""
+    data = test_settings
+
+    # tests weights computation
+
+    data = data.drop(["weights"], axis=1)
+    logreg = LogisticRegression(solver="lbfgs", penalty=None)
+    mask_col = data.columns.isin(
+        ["treatment_allocation", "event", "time", "propensity_scores"]
+    )
+    logreg.fit(np.array(data.loc[:, ~mask_col]), data["treatment_allocation"])
+
+    propensity_scores = logreg.predict_proba(np.array(data.loc[:, ~mask_col]))[:, 1]
+    weights = np.divide(1, propensity_scores) * np.array(data["treatment_allocation"])
+    weights += np.divide(1, 1 - propensity_scores) * (
+        1 - np.array(data["treatment_allocation"])
+    )
+
+    pooled_iptw = PooledIPTW(
+        treated_col="treatment_allocation",
+        event_col="event",
+        duration_col="time",
+        variance_method="robust",
+    )
+    pooled_iptw.fit(data)
+
+    np.testing.assert_allclose(
+        pooled_iptw.weights_,
+        weights,
+    )
+
+    # tests that weighted cox is well performed
+    data = test_settings
+
+    results = pooled_iptw._estimate_effect(
+        data.drop(["weights"], axis=1), data["weights"]
+    )
+
+    weighted_cox = CoxPHFitter()
+    weighted_cox.fit(
+        data[["time", "event", "treatment_allocation", "weights"]],
+        "time",
+        "event",
+        weights_col="weights",
+        robust=True,
+    )
+
+    pandas.testing.assert_frame_equal(left=weighted_cox.summary, right=results.summary)
+
+
+def test_maic(test_settings):
+    """Test for MAIC class."""
+    data = test_settings
+    data = data.drop(["weights"], axis=1)
+    treated_col = "treatment_allocation"
+    maic = MatchingAjudsted(
+        treated_col=treated_col,
+        event_col="event",
+        duration_col="time",
+        variance_method="robust",
+    )
+    maic.fit(data)
+
+    # MAIC model tested here
+    # https://github.com/AidanCooper/indcomp/blob/main/tests/test_maic.py
+    df_agg = data.groupby(treated_col)
+    df_agg = df_agg[["X_0", "X_1", "X_2"]].agg(["mean", "std"])
+    df_agg.columns = [".".join(x) for x in df_agg.columns]
+    targets = df_agg.loc[[0]]
+
+    matching_dict = {}
+    for col in ["X_0", "X_1", "X_2"]:
+        matching_dict[col + ".mean"] = ("mean", col)
+        matching_dict[col + ".std"] = ("std", col, col + ".mean")
+
+    true_maic = MAIC(
+        df_index=data.loc[data[treated_col] == 1],
+        df_target=targets,
+        match=matching_dict,
+    )
+    true_maic.calc_weights()
+
+    true_weights = true_maic.weights_
+
+    data.loc[data[treated_col].eq(1), "weights"] = true_weights
+    data.loc[~data[treated_col].eq(1), "weights"] = 1
+
+    weighted_cox = CoxPHFitter()
+    weighted_cox.fit(
+        data[["time", "event", treated_col, "weights"]],
+        "time",
+        "event",
+        weights_col="weights",
+        robust=True,
+    )
+
+    pandas.testing.assert_frame_equal(left=weighted_cox.summary, right=maic.results_)
diff --git a/fedeca/tests/test_distributed_robust_var_npy.py b/fedeca/tests/test_distributed_robust_var_npy.py
new file mode 100644
index 00000000..4639f755
--- /dev/null
+++ b/fedeca/tests/test_distributed_robust_var_npy.py
@@ -0,0 +1,112 @@
+"""Script to test robust variance."""
+import lifelines
+import numpy as np
+from lifelines import CoxPHFitter
+
+from fedeca.tests.common import TestTempDir
+from fedeca.utils.survival_utils import (  # robust_sandwich_variance_pooled,
+    CoxData,
+    robust_sandwich_variance_distributed,
+)
+
+
+class TestRobustDistVarNumpy(TestTempDir):
+    """Webdisco tests class."""
+
+    @classmethod
+    def setUpClass(
+        cls,
+        ndim=10,
+        nsamples=1000,
+        seed=1,
+    ):
+        """Initialize tests with data and FedIPTW object.
+
+        Parameters
+        ----------
+        nsamples : int, optional
+            The number of patients in total.
+        ndim : int, optional
+            The number of dimensions, by default 10
+        seed : int, optional
+            The seed, by default 43
+        """
+        super().setUpClass()
+        cls.ndim = ndim
+        cls.nsamples = nsamples
+        cls.seed = seed
+        rdm_state = np.random.default_rng(seed=cls.seed)
+        # Generate data
+
+        data = CoxData(
+            seed=cls.seed,
+            n_samples=cls.nsamples,
+            ndim=cls.ndim,
+            scale_t=10.0,
+            shape_t=3.0,
+            propensity="linear",
+            standardize_features=False,
+        )
+        df = data.generate_dataframe()
+        df = df.drop(columns=["propensity_scores", "treatment"], axis=1)
+        df["weights"] = np.abs(rdm_state.normal(size=cls.nsamples))
+        cls.df = df
+
+        # Fit simple cox model with robust = False
+        cls.lifelines_cph = CoxPHFitter()
+        cls.lifelines_cph.fit(
+            cls.df,
+            duration_col="time",
+            event_col="event",
+            robust=False,
+            weights_col="weights",
+        )
+
+        cls.non_robust_lifelines_variance = cls.lifelines_cph.variance_matrix_
+
+        # Create labels as in FedECA setup
+        cls.y = np.array(df["time"])
+        cls.y[df["event"] == 0] = -1 * cls.y[df["event"] == 0]
+        cls.X = df.drop(columns=["time", "event", "weights"], axis=1)
+
+        # need to normalize as in lifelines
+        cls.X = lifelines.utils.normalize(cls.X, cls.X.mean(0), cls.X.std(0))
+
+        cls.lifelines_cph.fit(
+            cls.df,
+            duration_col="time",
+            event_col="event",
+            robust=True,
+            weights_col="weights",
+        )
+        cls.true_variance = cls.lifelines_cph.variance_matrix_
+
+        cls.beta = cls.lifelines_cph.params_ * cls.lifelines_cph._norm_std
+        cls.weights = np.array(df["weights"].copy())
+        cls.scaled_variance_matrix = (
+            cls.non_robust_lifelines_variance
+            * np.tile(cls.lifelines_cph._norm_std.values, (cls.ndim, 1)).T
+        )
+
+    def test_distributed_se_computation(self, n_clients=2):
+        """Test equivalence with lifelines.
+
+        Parameters
+        ----------
+        n_clients : int, optional
+            The number of clients, by default 10
+        """
+        se = robust_sandwich_variance_distributed(
+            np.array(self.X),
+            self.y,
+            self.beta,
+            self.weights,
+            self.scaled_variance_matrix,
+            n_clients=n_clients,
+        )
+
+        np.testing.assert_allclose(
+            se,
+            self.lifelines_cph.summary["se(coef)"],
+            rtol=1e-5,
+        )
diff --git a/fedeca/tests/test_dp_end2end.py b/fedeca/tests/test_dp_end2end.py
new file mode 100644
index 00000000..f934fc4b
--- /dev/null
+++ b/fedeca/tests/test_dp_end2end.py
@@ -0,0 +1,178 @@
+"""Tests for DP training."""
+
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+
+# from substrafl.model_loading import download_algo_state
+from substrafl.strategies import FedAvg
+from torch.optim import SGD
+
+from fedeca.algorithms.torch_dp_fed_avg_algo import TorchDPFedAvgAlgo
+from fedeca.fedeca_core import LogisticRegressionTorch
+from fedeca.tests.common import TestTempDir
+from fedeca.utils import (
+    Experiment,
+    make_accuracy_function,
+    make_substrafl_torch_dataset_class,
+)
+from fedeca.utils.survival_utils import CoxData, make_categorical
+
+
+# TODO increase rounds and an an assert to pooled equivalence as in
+# aper simulations
+class TestDPPropensityEnd2End(TestTempDir):
+    """Webdisco tests class."""
+
+    @classmethod
+    def setUpClass(
+        cls,
+        n_clients=3,
+        ndim=10,
+        nsamples=300,
+        seed=43,
+    ):
+        """Initialize tests with data and FedIPTW object.
+
+        Parameters
+        ----------
+        n_clients : int, optional
+            The number of clients, by default 3
+        nsamples : int, optional
+            The number of patients in total.
+        ndim : int, optional
+            The number of dimensions, by default 10
+        initial_step_size : float, optional
+            The first step size of NR descent, by default 0.95
+        seed : int, optional
+            The seed, by default 43
+        standardize_data : bool, optional
+            Whether or not to standardize data, by default True
+        l1_ratio : float, optional
+            The l1 ratio wrt L2., by default 0.0
+        penalizer : float, optional
+            The weight for the elasticnet penalty, by default 0.0
+        learning_rate_strategy : str, optional
+            How do we decrease the lr, by default "lifelines"
+        """
+        super().setUpClass()
+        cls.n_clients = n_clients
+        rng = np.random.default_rng(seed)
+        # Generating data with strong linear relationship
+        simu_coxreg = CoxData(
+            n_samples=nsamples,
+            ndim=ndim,
+            prop_treated=0.5,
+            propensity="linear",
+            dtype="float32",
+            # Strong linearity
+            overlap=100.0,
+            seed=rng,
+            random_censoring=True,
+            censoring_factor=0.3,
+            standardize_features=False,
+        )
+        X, T, C, treated, _ = simu_coxreg.generate_data()
+        # Will make first columns to be categorical
+        Xcat, Xcont = make_categorical(X, up_to=0)
+        # Build the final dataframe using appropriate column names and adding
+        # missing values
+        cols_dict = {}
+        X = np.concatenate((Xcat, Xcont), axis=1)
+        for i in range(Xcat.shape[1] + Xcont.shape[1]):
+            currentX = X[:, i].astype("float32")
+            mask_na = rng.uniform(0, 1, X.shape[0]) > (1.0 - 0.0)
+            currentX[mask_na] = np.nan
+            if i < Xcat.shape[1]:
+                colname = "cat_col"
+            else:
+                colname = "col"
+                i -= Xcat.shape[1]
+            cols_dict[f"{colname}_{i}"] = currentX
+
+            #  The absolute value is superfluous but just to be sure
+            cols_dict["T"] = np.abs(T)
+            cols_dict["E"] = (1.0 - C).astype("uint8")
+            cols_dict["treated"] = treated
+
+            df = pd.DataFrame(cols_dict)
+            # Final cast of categorical columns that was impossible due to nan in numpy
+            for i in range(Xcat.shape[1]):
+                df[f"cat_col_{i}"] = df[f"cat_col_{i}"].astype("Int64")
+
+        cls.df = df
+        accuracy_metrics_dict = {"accuracy": make_accuracy_function("treated")}
+        logreg_dataset_class = make_substrafl_torch_dataset_class(
+            ["treated"], "E", "T", dtype="float32", return_torch_tensors=True
+        )
+        cls.dp_trainings = []
+        for i in range(2):
+            num_rounds = 10
+            logreg_model = LogisticRegressionTorch(ndim, torch.float32)
+            optimizer = SGD(logreg_model.parameters(), lr=0.01)
+
+            # Do not put self attributes in this class
+            class DPLogRegAlgo(TorchDPFedAvgAlgo):
+                def __init__(self):
+                    super().__init__(
+                        model=logreg_model,
+                        criterion=nn.BCELoss(),
+                        optimizer=optimizer,
+                        dataset=logreg_dataset_class,
+                        seed=seed,
+                        num_updates=100,
+                        batch_size=32,
+                        num_rounds=num_rounds,
+                        # industry standard
+                        dp_target_epsilon=10.0,
+                        # around 1/nsamples aroximately
+                        dp_target_delta=0.001,
+                        dp_max_grad_norm=1.0,
+                    )
+
+            dp_algo = DPLogRegAlgo()
+            dp_fedavg_strategy = FedAvg(algo=dp_algo)
+            dp_xp = Experiment(
+                strategies=[dp_fedavg_strategy],
+                num_rounds_list=[num_rounds],
+                metrics_dicts_list=[accuracy_metrics_dict],
+            )
+            cls.dp_trainings.append(dp_xp)
+
+    def test_fit(self):
+        """Test end2end aplication of DP FL to synthetic data."""
+        dp_kwargs = {
+            "data": self.df,
+            "nb_clients": self.n_clients,
+            "data_path": self.test_dir,
+        }
+
+        dp_kwargs["backend_type"] = "subprocess"
+        self.dp_trainings[0].fit(**dp_kwargs)
+        # final_algo = download_algo_state(
+        #     client=self.dp_trainings[0].ds_client,
+        #     compute_plan_key=self.dp_trainings[0].compute_plan_keys[0].key,
+        #     round_idx=None,
+        # )
+
+        # final_model_subprocess = final_algo.model
+
+        dp_kwargs["backend_type"] = "simu"
+        self.dp_trainings[1].fit(**dp_kwargs)
+        # final_model_simu = self.dp_trainings[1].train_data_nodes[0].algo.model
+
+        # assert np.allclose(final_model_subprocess.fc1.weight.detach().numpy(),
+        # final_model_simu.fc1.weight.detach().numpy())
+
+    @classmethod
+    def tearDownClass(cls):
+        """Tear down the class."""
+        super(TestDPPropensityEnd2End, cls).tearDownClass()
+        # We need to avoid persistence of DB in between TestCases, this is an obscure
+        # hack but it's working
+        first_client = cls.dp_trainings[0].ds_client
+        database = first_client._backend._db._db._data
+        if len(database.keys()) > 1:
+            for k in list(database.keys()):
+                database.pop(k)
diff --git a/fedeca/tests/test_fliptw_end2end.py b/fedeca/tests/test_fliptw_end2end.py
new file mode 100644
index 00000000..ebda2323
--- /dev/null
+++ b/fedeca/tests/test_fliptw_end2end.py
@@ -0,0 +1,113 @@
+"""Tests for webdisco."""
+from pandas.testing import assert_frame_equal
+
+from fedeca.fedeca_core import FedECA
+from fedeca.tests.common import TestTempDir
+from fedeca.utils.data_utils import generate_survival_data
+
+
+# TODO increase rounds and an an assert to pooled equivalence as in
+# aper simulations
+class TestFLIPTWEnd2End(TestTempDir):
+    """Webdisco tests class."""
+
+    @classmethod
+    def setUpClass(
+        cls,
+        n_clients=3,
+        ndim=10,
+        initial_step_size=0.95,
+        seed=43,
+        standardize_data=True,
+        l1_ratio=0.0,
+        penalizer=0.0,
+        use_propensity=False,
+        learning_rate_strategy="lifelines",
+    ):
+        """Initialize tests with data and FedIPTW object.
+
+        Parameters
+        ----------
+        n_clients : int, optional
+            The number of clients, by default 3
+        ndim : int, optional
+            The number of dimensions, by default 10
+        initial_step_size : float, optional
+            The first step size of NR descent, by default 0.95
+        seed : int, optional
+            The seed, by default 43
+        standardize_data : bool, optional
+            Whether or not to standardize data, by default True
+        l1_ratio : float, optional
+            The l1 ratio wrt L2., by default 0.0
+        penalizer : float, optional
+            The weight for the elasticnet penalty, by default 0.0
+        learning_rate_strategy : str, optional
+            How do we decrease the lr, by default "lifelines"
+        """
+        super().setUpClass()
+        cls.n_clients = n_clients
+        cls.df, _ = generate_survival_data(
+            na_proportion=0.0,
+            ncategorical=0,
+            ndim=ndim,
+            seed=seed,
+            n_samples=1000,
+            use_cate=False,
+            censoring_factor=0.3,
+        )
+        # We can choose not to give any clients or data of any kind to FedECA
+        # they will be given to it by the fit method
+        cls.IPTWs = [
+            FedECA(
+                ndim=ndim,
+                treated_col="treated",
+                duration_col="T",
+                event_col="E",
+                num_rounds_list=[10, 10],
+                initial_step_size=initial_step_size,
+                seed=seed,
+                standardize_data=standardize_data,
+                l1_ratio=l1_ratio,
+                penalizer=penalizer,
+                learning_rate_strategy=learning_rate_strategy,
+            )
+            for _ in range(2)
+        ]
+
+    def test_fit(self):
+        """Test end2end aplication of IPTW to synthetic data."""
+        iptw_kwargs = {
+            "data": self.df,
+            "targets": None,
+            "n_clients": self.n_clients,
+            "split_method": "split_control_over_centers",
+            "split_method_kwargs": {"treatment_info": "treated"},
+            "data_path": self.test_dir,
+            # "dp_target_epsilon": 2.,
+            # "dp_max_grad_norm": 1.,
+            # "dp_target_delta": 0.001,
+            # "dp_propensity_model_training_params": {"batch_size": 100, "num_updates": 100},  # noqa: E501
+            # "dp_propensity_model_optimizer_kwargs": {"lr": 0.01},
+        }
+
+        iptw_kwargs["backend_type"] = "subprocess"
+        self.IPTWs[0].fit(**iptw_kwargs)
+        iptw_kwargs["backend_type"] = "simu"
+        self.IPTWs[1].fit(**iptw_kwargs)
+        # TODO verify propensity model training wrt sklearn and full chain
+        # vs iptw pooled implementation with sklearn and lifelines
+        assert_frame_equal(self.IPTWs[0].results_, self.IPTWs[1].results_)
+        assert self.IPTWs[0].ll[0] == self.IPTWs[1].ll[0]
+
+    @classmethod
+    def tearDownClass(cls):
+        """Tear down the class."""
+        super(TestFLIPTWEnd2End, cls).tearDownClass()
+        # We need to avoid persistence of DB in between TestCases, this is an obscure
+        # hack but it's working
+        first_client = cls.IPTWs[0].ds_client
+        database = first_client._backend._db._db._data
+        if len(database.keys()) > 1:
+            for k in list(database.keys()):
+                database.pop(k)
diff --git a/fedeca/tests/test_propensity.py b/fedeca/tests/test_propensity.py
new file mode 100644
index 00000000..67723bb7
--- /dev/null
+++ b/fedeca/tests/test_propensity.py
@@ -0,0 +1,51 @@
+"""Tests for propensity score related functionality."""
+import numpy as np
+import pytest
+from scipy.stats import binomtest
+
+from fedeca.utils.survival_utils import CoxData
+
+list_prop_treated = np.arange(0.1, 1, 0.1)
+
+
+@pytest.mark.parametrize("prop_treated", list_prop_treated)
+def test_constant_propensity(prop_treated: float):
+    """Tests of data generation with constant propensity score."""
+    data_gen = CoxData(
+        n_samples=1,
+        ndim=100,
+        prop_treated=prop_treated,
+        propensity="constant",
+        seed=42,
+    )
+
+    n_samples = 1001
+    _, _, _, treated, ps_scores = data_gen.generate_data(n_samples=n_samples)
+
+    np.testing.assert_allclose(ps_scores, data_gen.prop_treated)
+
+    n_treated_expected = int(n_samples * prop_treated)
+    n_treated = treated.sum()
+    # Constant propensity will use `random_treatment_allocation` which ensures
+    # that `prop_treated` is respected, therefore check equality up to rounding.
+    assert n_treated in (n_treated_expected, n_treated_expected + 1)
+
+
+def test_linear_propensity():
+    """Tests of data generation with linear propensity score."""
+    prop_treated = 0.5
+    data_gen = CoxData(
+        n_samples=1,
+        ndim=100,
+        prop_treated=prop_treated,
+        propensity="linear",
+        seed=42,
+    )
+
+    n_samples = 1001
+    _, _, _, treated, ps_scores = data_gen.generate_data(n_samples=n_samples)
+
+    np.testing.assert_allclose(ps_scores.mean(), prop_treated, atol=0.05)
+
+    conf_int = binomtest(treated.sum(), treated.size, prop_treated).proportion_ci()
+    assert conf_int.low <= prop_treated <= conf_int.high
diff --git a/fedeca/tests/test_robust_iptw.py b/fedeca/tests/test_robust_iptw.py
new file mode 100644
index 00000000..15ae717c
--- /dev/null
+++ b/fedeca/tests/test_robust_iptw.py
@@ -0,0 +1,97 @@
+"""Tests for Robust IPTW."""
+import pandas as pd
+import pytest
+
+from fedeca.competitors import PooledIPTW
+from fedeca.fedeca_core import FedECA
+from fedeca.tests.common import TestTempDir
+from fedeca.utils.survival_utils import CoxData
+
+
+class TestFedECAEnd2End(TestTempDir):
+    """IPTW tests class."""
+
+    # This is very long and thus should be only executed once that is why we use
+    # setUpClass unlike setUp wich would otherwise get executed for each method
+    @classmethod
+    def setUpClass(
+        cls,
+        n_clients=3,
+        ndim=100,
+        nsamples=1000,
+        seed=43,
+        robust=False,
+    ):
+        """Set up the test class for experiment comparison.
+
+        Parameters
+        ----------
+        n_clients : int
+            The number of clients in the federation
+        nsamples : int
+            The number of samles in total.
+        seed : int
+            The seed to use for the data generation process.
+        robust : bool
+            Whether to use robust variance estimation or not.
+        """
+        super().setUpClass()
+        cls.seed = seed
+        cls.nsamples = nsamples
+        cls.ndim = ndim
+        cls.robust = robust
+        data = CoxData(seed=cls.seed, n_samples=cls.nsamples, ndim=cls.ndim)
+        df = data.generate_dataframe()
+        cls.df = df.drop(columns=["propensity_scores"], axis=1)
+        cls._treated_col = "treatment"
+        cls._event_col = "event"
+        cls._duration_col = "time"
+
+        cls.pooled_iptw = PooledIPTW(
+            treated_col=cls._treated_col,
+            event_col=cls._event_col,
+            duration_col=cls._duration_col,
+            cox_fit_kwargs={"robust": cls.robust},
+        )
+        cls.pooled_iptw.fit(cls.df)
+        cls.pooled_iptw_results = cls.pooled_iptw.results_
+
+        cls.fed_iptw = FedECA(
+            ndim=cls.ndim,
+            treated_col=cls._treated_col,
+            duration_col=cls._duration_col,
+            event_col=cls._event_col,
+            num_rounds_list=[50, 50],
+        )
+        cls.fed_iptw.fit(
+            cls.df,
+            None,
+            n_clients,
+            split_method="split_control_over_centers",
+            split_method_kwargs={"treatment_info": cls._treated_col},
+            backend_type="subprocess",
+            robust=cls.robust,
+            data_path=cls.test_dir,
+        )
+        cls.fed_iptw_results = cls.fed_iptw.results_
+
+    @pytest.mark.slow
+    def test_standard_deviations(self):
+        """Test equality of end results.
+
+        We allow ourselves rtol=1e-2 as in the paper.
+        """
+        pd.testing.assert_frame_equal(
+            self.pooled_iptw_results.reset_index()[self.fed_iptw_results.columns],
+            self.fed_iptw_results,
+            rtol=1e-2,
+        )
+
+
+class TestRobustFedECAEnd2End(TestFedECAEnd2End):
+    """RobustIPTW tests class."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Use parent class setup with robust=True."""
+        super().setUpClass(robust=True)
diff --git a/fedeca/utils/__init__.py b/fedeca/utils/__init__.py
new file mode 100644
index 00000000..ff9a7a2d
--- /dev/null
+++ b/fedeca/utils/__init__.py
@@ -0,0 +1,9 @@
+"""Init file for utility functions."""
+from .substrafl_utils import (
+    Experiment,
+    SubstraflTorchDataset,
+    make_substrafl_torch_dataset_class,
+    make_accuracy_function,
+    make_c_index_function,
+)
+from .moments_utils import compute_uncentered_moment, aggregation_mean
diff --git a/fedeca/utils/constants.py b/fedeca/utils/constants.py
new file mode 100644
index 00000000..aee2513b
--- /dev/null
+++ b/fedeca/utils/constants.py
@@ -0,0 +1,20 @@
+"""File containing constants for the repo."""
+import socket
+from os.path import join
+
+import git
+
+if socket.gethostname().startswith("abstra"):
+    EXPE_PATH = "/home/owkin/project/results_experiments"
+else:
+    repo = git.Repo(".", search_parent_directories=True)
+    EXPE_PATH = join(repo.working_dir, "experiments", "results")
+
+EXPERIMENTS_PATHS = {
+    "pooled_equivalent": EXPE_PATH + "/pooled_equivalent/",
+    "nb_clients": EXPE_PATH + "/nb_clients/",
+    "power": EXPE_PATH + "/power/",
+    "dp_results": EXPE_PATH + "/pooled_equivalent_dp/",
+    "real_world": EXPE_PATH + "/real-world/",
+    "robust_pooled_equivalence": EXPE_PATH + "/robust_pooled_equivalence/",
+}
diff --git a/fedeca/utils/data_utils.py b/fedeca/utils/data_utils.py
new file mode 100644
index 00000000..a8a78fd0
--- /dev/null
+++ b/fedeca/utils/data_utils.py
@@ -0,0 +1,355 @@
+"""Utility functions of data generation."""
+import copy
+import math
+import os
+import random
+import zlib
+from collections.abc import Callable
+from itertools import chain
+from pathlib import Path
+from typing import Union
+
+import numpy as np
+import pandas as pd
+from pandas.util import hash_pandas_object
+from substra.sdk.schemas import DataSampleSpec, DatasetSpec, Permissions
+from substrafl.nodes import TestDataNode, TrainDataNode
+
+import fedeca
+from fedeca.utils.substra_utils import Client
+from fedeca.utils.survival_utils import generate_survival_data
+
+
+def generate_cox_data_and_substra_clients(
+    n_clients: int = 2,
+    ndim: int = 10,
+    split_method_kwargs: Union[dict, None] = None,
+    backend_type: str = "subprocess",
+    data_path: Union[str, None] = None,
+    urls: Union[list, None] = None,
+    tokens: Union[list, None] = None,
+    seed: int = 42,
+    n_per_client: int = 200,
+    add_treated: bool = False,
+    ncategorical: int = 0,
+):
+    """Generate Cox data on disk for several clients.
+
+    Generate Cox data and register them with different
+    fake clients.
+
+    Parameters
+    ----------
+    n_clients : int, (optional)
+        Number of clients. Defaults to 2.
+    ndim : int, (optional)
+        Number of covariates. Defaults to 10.
+    split_method_kwargs = Union[dict, None]
+        The argument to the split_method uniform.
+    backend_type : str, (optional)
+        Type of backend. Defaults to "subprocess".
+    data_path : str, (optional)
+       Path to save the data. Defaults to None.
+    seed : int, (optional)
+        Random seed. Defaults to 42.
+    n_per_client : int, (optional)
+        Number of samples per client. Defaults to 200.
+    add_treated : bool, (optional)
+        Whether or not to keep treated column.
+    ncategorical: int, (optional)
+        Number of features to make categorical a posteriori (moving away from Cox
+        assumptions).
+    """
+    assert backend_type in ["remote", "docker", "subprocess"]
+    assert n_clients >= 2
+    if split_method_kwargs is not None:
+        split_method_kwargs = copy.deepcopy(split_method_kwargs)
+        if "seed" in split_method_kwargs:
+            raise ValueError("You provided splitting seed twice")
+    else:
+        split_method_kwargs = {}
+    split_method_kwargs["seed"] = seed
+    df, cox_model_coeffs = generate_survival_data(
+        na_proportion=0.0,
+        ncategorical=ncategorical,
+        ndim=ndim,
+        seed=seed,
+        n_samples=n_per_client * n_clients,
+        use_cate=False,
+        censoring_factor=0.3,
+    )
+
+    if not (add_treated):
+        df.drop("treated", axis=1, inplace=True)
+
+    return (
+        *split_dataframe_across_clients(
+            df=df,
+            n_clients=n_clients,
+            split_method="uniform",
+            split_method_kwargs=split_method_kwargs,
+            backend_type=backend_type,
+            data_path=data_path,
+            urls=urls,
+            tokens=tokens,
+        ),
+        cox_model_coeffs,
+    )
+
+
+def split_dataframe_across_clients(
+    df,
+    n_clients,
+    split_method: Union[Callable, str] = "uniform",
+    split_method_kwargs: Union[dict, None] = None,
+    backend_type="subprocess",
+    data_path: Union[str, None] = None,
+    urls=[],
+    tokens=[],
+):
+    """Split patients over the centers.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame,
+        Dataframe containing features of the patients.
+    n_clients : int,
+        Number of clients.
+    split_method : Union[Callable, str]
+        How to split the dataset across all clients, if callable should have the
+        signature: df, n_clients, kwargs -> list[list[int]]
+        if str should be an existing key, which will invoke the corresponding
+        callable. Possible values are `uniform` which splits the patients
+        uniformly across centers or `split_control_over_centers` where one
+        center has all the treated patients and the control is split over the
+        remaining ones.
+    split_method_kwargs: Union[dict, None]
+        Optional kwargs for the split_method method.
+    backend_type : str, (optional)
+        Backend type. Default is "subprocess".
+    data_path : Union[str, None],
+        Path on where to save the data on disk.
+    urls : List,
+        List of urls.
+    tokens : List,
+        List of tokens.
+    """
+    # Deterministic hashing of non human-readable objects: df and
+    # split_method_kwargs
+    to_hash = hash_pandas_object(df, index=True).values.tolist() + [split_method_kwargs]
+    to_hash = str.encode("".join([str(e) for e in to_hash]))
+    hash_df = zlib.adler32(to_hash)
+    clients = []
+    if backend_type == "remote":
+        assert (
+            len(urls) == n_clients
+        ), f"You should provide a list of {n_clients} URLs for the different clients"
+        assert (
+            len(tokens) == n_clients
+        ), "You should provide a token for each client in remote mode"
+        for i in range(n_clients):
+            clients.append(Client(url=urls[i], token=tokens[i], backend_type="remote"))
+    else:
+        for i in range(n_clients):
+            clients.append(Client(backend_type=backend_type))
+
+    clients = {c.organization_info().organization_id: c for c in clients}
+    # Store organization IDs
+    ORGS_ID = list(clients.keys())
+
+    ALGO_ORG_ID = ORGS_ID[0]  # Algo provider is defined as the first organization.
+    DATA_PROVIDER_ORGS_ID = ORGS_ID
+
+    if data_path is None:
+        (Path.cwd() / "tmp").mkdir(exist_ok=True)
+        data_path = Path.cwd() / "tmp" / "data_eca"
+    else:
+        data_path = Path(data_path)
+        # All paths need to be absolute paths
+        data_path = data_path.resolve()
+
+    (data_path).mkdir(exist_ok=True)
+    if isinstance(split_method, str):
+        assert split_method in [
+            "uniform",
+            "split_control_over_centers",
+        ], f"split_method name {split_method} not recognized"
+        if split_method == "uniform":
+            split_method = uniform_split
+        else:
+            split_method = split_control_over_centers
+
+    if split_method_kwargs is None:
+        split_method_kwargs = {}
+    # Now split_method is a Callable
+    clients_indices_list = split_method(df, n_clients, **split_method_kwargs)
+    all_indices = set(chain.from_iterable(clients_indices_list))
+    # Check that split methods is valid (no drop_last) could be removed for
+    # more flexibility
+    assert len(all_indices) == len(df.index)
+    assert set(all_indices) == set(range(len(df.index)))
+    dfs = []
+
+    for i in range(n_clients):
+        os.makedirs(data_path / f"center{i}", exist_ok=True)
+        cdf = df.iloc[clients_indices_list[i]]
+        df_path = data_path / f"center{i}" / "data.csv"
+        if df_path.exists():
+            df_path.unlink()
+        cdf.to_csv(df_path, index=False)
+        dfs.append(cdf)
+    assets_directory = Path(fedeca.__file__).parent / "scripts" / "substra_assets"
+
+    # Sample registration: will fill two dicts with relevant handles to retrieve
+    # data
+    dataset_keys = {}
+    datasample_keys = {}
+
+    dataset_name = (
+        f"ECA-{hash_df}-nclients{n_clients}-split-method{split_method.__name__}"
+    )
+
+    for i, org_id in enumerate(DATA_PROVIDER_ORGS_ID):
+        client = clients[org_id]
+        found_eca_datasets = [
+            dataset
+            for dataset in client.list_dataset(filters={"owner": [org_id]})
+            if dataset.name == dataset_name
+        ]
+        if len(found_eca_datasets) == 0:
+            permissions_dataset = Permissions(
+                public=False, authorized_ids=[ALGO_ORG_ID]
+            )
+            # DatasetSpec is the specification of a dataset. It makes sure every field
+            # is well defined, and that our dataset is ready to be registered.
+            # The real dataset object is created in the add_dataset method.
+            dataset = DatasetSpec(
+                name=dataset_name,
+                type="csv",
+                data_opener=assets_directory / "csv_opener.py",
+                description=assets_directory / "description.md",
+                permissions=permissions_dataset,
+                logs_permission=permissions_dataset,
+            )
+            dataset_keys[org_id] = client.add_dataset(dataset)
+            assert dataset_keys[org_id], "Missing dataset key"
+
+            # Add the training data on each organization.
+            data_sample = DataSampleSpec(
+                data_manager_keys=[dataset_keys[org_id]],
+                path=data_path / f"center{i}",
+            )
+            datasample_keys[org_id] = client.add_data_sample(data_sample)
+        # Maybe samples already exist in the platform
+        else:
+            dataset_keys[org_id] = found_eca_datasets[0].key
+            datasample_keys[org_id] = found_eca_datasets[0].data_sample_keys[0]
+
+    # Actual creation of objects of interest
+    train_data_nodes = []
+    test_data_nodes = []
+    for org_id in DATA_PROVIDER_ORGS_ID:
+        # Create the Train Data Node (or training task) and save it in a list
+        train_data_node = TrainDataNode(
+            organization_id=org_id,
+            data_manager_key=dataset_keys[org_id],
+            data_sample_keys=[datasample_keys[org_id]],
+        )
+
+        train_data_nodes.append(train_data_node)
+
+        # Create the Train Data Node (or training task) and save it in a list
+        test_data_node = TestDataNode(
+            organization_id=org_id,
+            data_manager_key=dataset_keys[org_id],
+            test_data_sample_keys=[datasample_keys[org_id]],
+            metric_functions=[],
+        )
+
+        test_data_nodes.append(test_data_node)
+
+    return clients, train_data_nodes, test_data_nodes, dfs, df
+
+
+def uniform_split(
+    df: pd.DataFrame, n_clients: int, use_random: bool = True, seed: int = 42
+):
+    """Split patients uniformly over n_clients.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame,
+        Dataframe containing features of the patients.
+    n_clients : int,
+        Number of clients.
+    use_random : bool
+        Whether or not to shuffle data before splitting. Defaults to True.
+    seed : int, (optional)
+        Seeding for shuffling
+    """
+    # We don't want to alter df
+    df_func = copy.deepcopy(df)
+    df_func_size = len(df_func.index)
+    indices = list(range(df_func_size))
+    if use_random:
+        random.seed(seed)
+        random.shuffle(indices)
+    indices_list = []
+    n_samples_per_client = math.ceil(df_func_size / n_clients)
+
+    for i in range(n_clients):
+        start = i * n_samples_per_client
+        stop = min((i + 1) * n_samples_per_client, len(indices))
+        indices_center = indices[start:stop]
+        indices_list.append(indices_center)
+    return indices_list
+
+
+def split_control_over_centers(
+    df,
+    n_clients,
+    treatment_info="treatment_allocation",
+    use_random: bool = True,
+    seed: int = 42,
+):
+    """Split patients in the control group over the centers.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame,
+        Dataframe containing features of the patients.
+    n_clients : int,
+        Number of clients.
+    treatment_info : str, (optional)
+        Column name for the treatment allocation covariate.
+        Defaults to "treatment_allocation".
+    use_random : bool
+        Whether or not to shuffle the control group indices before splitting.
+    seed: int
+        The seed of the shuffling.
+    """
+    # We don't want to alter df
+    df_func = copy.deepcopy(df)
+    # Making sure there is no funny business with indices
+    df_func.reset_index(inplace=True)
+
+    df_func_size = len(df_func.index)
+
+    # Computing number of samples in each center with control data
+    n_control_center = math.ceil((df_func[treatment_info] == 0).sum() / (n_clients - 1))
+
+    indices_treated_df = np.flatnonzero(df[treatment_info] == 1).tolist()
+    indices_list = [indices_treated_df]
+    indices_control_df = [
+        idx for idx in range(df_func_size) if idx not in indices_treated_df
+    ]
+    if use_random:
+        random.seed(seed)
+        random.shuffle(indices_control_df)
+
+    for i in range(1, n_clients):
+        start = (i - 1) * n_control_center
+        stop = min(i * n_control_center, len(indices_control_df))
+        indices_center = indices_control_df[start:stop]
+        indices_list.append(indices_center)
+    return indices_list
diff --git a/fedeca/utils/experiment_utils.py b/fedeca/utils/experiment_utils.py
new file mode 100644
index 00000000..2b2a4e68
--- /dev/null
+++ b/fedeca/utils/experiment_utils.py
@@ -0,0 +1,302 @@
+"""Module related to synthetic experiments."""
+from __future__ import annotations
+
+import copy
+import itertools
+import pickle
+import time
+from collections.abc import Mapping
+from typing import Any, Optional
+
+import numpy as np
+import pandas as pd
+from lifelines.exceptions import ConvergenceError
+
+from fedeca.fedeca_core import FedECA
+from fedeca.utils.survival_utils import BaseSurvivalEstimator, CoxData
+from fedeca.utils.typing import _SeedType
+
+
+def load_dataframe_from_pickles(filename: str) -> pd.DataFrame:
+    """Specialized function to load dataframe from a pickle file.
+
+    Parameters
+    ----------
+    filename: str
+        Path to the pickle file. Supposedly the output of
+        `experiments.run_experiment` that contains a set of pickles, where each
+        pickle is a list of pandas.DataFrame.
+
+    Returns
+    -------
+    pandas.DataFrame
+        A single dataframe concatenating all results.
+    """
+
+    def load_pickles(filename: str):
+        with open(filename, "rb") as file:
+            while True:
+                try:
+                    yield pickle.load(file)
+                except EOFError:
+                    break
+                except pickle.UnpicklingError:
+                    continue
+
+    return pd.concat(df for list_df in load_pickles(filename) for df in list_df)
+
+
+def param_grid_from_dict(param_dict: dict[str, Any]) -> pd.DataFrame:
+    """Generate a grid of parameters as pandas.DataFrame from a dictionary.
+
+    Nested dictionary not supported.
+
+    Returns
+    -------
+    pandas.DataFrame
+        A dataframe where each column represents a parameter, each row represents
+        a combination of parameters.
+    """
+    for key, value in param_dict.items():
+        if pd.api.types.is_scalar(value):
+            param_dict[key] = [value]
+    return pd.DataFrame(
+        itertools.product(*param_dict.values()),
+        columns=list(param_dict.keys()),
+    )
+
+
+def std_mean_differences(x, y):
+    """Compute standardized mean differences."""
+    std_x = np.std(x)
+    std_y = np.std(y)
+    if (std_x == 0) and (std_y == 0):
+        return np.mean(x) - np.mean(y)
+    return (np.mean(x) - np.mean(y)) / np.sqrt((std_x**2 + std_y**2) / 2)
+
+
+def ratio_variances(x, y):
+    """Compute ratio of variances."""
+    std_x = np.std(x)
+    std_y = np.std(y)
+    if std_y == 0:
+        return np.infty
+    return std_x**2 / std_y**2
+
+
+def effective_sample_size(w):
+    """Compute effective sample size."""
+    denom = np.sum(w**2)
+    if denom > 0.0:
+        return (np.sum(w) ** 2) / denom
+    return 0
+
+
+def single_experiment(
+    data_gen: CoxData,
+    n_samples: int,
+    models: Mapping[str, BaseSurvivalEstimator],
+    duration_col: str = "time",
+    event_col: str = "event",
+    treated_col: str = "treatment",
+    ps_col: str = "propensity_scores",
+    seed: _SeedType = None,
+    fit_fedeca: Optional[dict[str, Any]] = None,
+    return_propensities: bool = True,
+    return_weights: bool = True,
+):
+    """Perform a single experiment comparing survival models.
+
+    Parameters
+    ----------
+    data_gen : CoxData
+        Data generator instance.
+    n_samples : int
+        Number of samples to generate.
+    models : Mapping[str, BaseSurvivalEstimator]
+        Dictionary of survival models to evaluate.
+    duration_col : str, optional
+        Column name for event duration, by default "time".
+    event_col : str, optional
+        Column name for event indicator, by default "event".
+    treated_col : str, optional
+        Column name for treatment indicator, by default "treatment".
+    ps_col : str, optional
+        Column name for propensity scores, by default "propensity_scores".
+    seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}, optional
+        The seed for reproducibility. Defaults to None.
+    fit_fedeca: dict[str, Any], optional, by default None
+        Dictionary of kwargs for the fit function of
+        :class:`fedeca.fedeca_core.FedECA`.
+    return_propensities: bool
+        return propensity scores in the results dataframe, by default True
+    return_weights: bool
+        return samples weights in the results dataframe, by default True
+
+    Returns
+    -------
+    pd.DataFrame
+        Results of the experiment of `n` rows where `n` is the number of models.
+    """
+    if seed is None:
+        seed = data_gen.rng
+    if fit_fedeca is None:
+        fit_fedeca = {}
+    rng = np.random.default_rng(seed)
+    # prepare dataframe
+    data = data_gen.generate_dataframe(
+        n_samples,
+        prefix="X_",
+        duration_col=duration_col,
+        event_col=event_col,
+        treated_col=treated_col,
+        ps_col=ps_col,
+        seed=rng,
+    )
+
+    res = []
+
+    non_cov = [treated_col, event_col, duration_col, ps_col]
+    covariates = [x for x in data.columns if x not in non_cov]
+
+    mask_treated = data[treated_col].eq(1)
+    smd_true_ps = std_mean_differences(
+        data[ps_col][mask_treated],
+        data[ps_col][~mask_treated],
+    )
+    df_smd_raw = (
+        data[covariates]
+        .apply(lambda s: std_mean_differences(s[mask_treated], s[~mask_treated]))
+        .to_frame()
+        .transpose()
+        .add_prefix("smd_raw_")
+    )
+    ate_true = data_gen.average_treatment_effect_
+    percent_ties = data_gen.percent_ties
+    models_fit_times = {model_name: None for model_name, _ in models.items()}
+
+    for name, model in models.items():
+        model.treated_col = treated_col
+        model.event_col = event_col
+        model.duration_col = duration_col
+        targets = None
+        if name.lower() == "oracleiptw":
+            targets = data[ps_col]
+        if isinstance(model, FedECA):
+            data_fedeca = copy.deepcopy(data).drop(columns=[ps_col])
+            # Note that for now FedECA cannot use the targets argument
+            # if you want to use it we need to do another PR
+            backend_type = fit_fedeca.get("backend_type", "subprocess")
+            if backend_type == "remote":
+                fit_fedeca["urls"] = fit_fedeca["urls"][: fit_fedeca["n_clients"]]
+                fit_fedeca["tokens"] = [
+                    open(f"/home/owkin/tokens/api_key{i + 1}", "r").read()
+                    for i in range(1, fit_fedeca["n_clients"] + 1)
+                ]
+
+            model.fit(data_fedeca, targets, **fit_fedeca)
+            # For some reasons sometimes parameters are passed directly to the
+            # instance model wo being included in fit_fedeca hence the slightly
+            # convoluted syntax
+            dp_target_epsilon = model.__dict__.pop("dp_target_epsilon", np.nan)
+            dp_target_delta = model.__dict__.pop("dp_target_delta", np.nan)
+
+            models_fit_times[name] = model.total_fit_time
+        else:
+            try:
+                t1 = time.time()
+                model.fit(data, targets)
+                t2 = time.time()
+                models_fit_times[name] = t2 - t1
+            except ConvergenceError:
+                # More likely to happen with small sample size and large covariate shift
+                pass
+
+        if model.results_ is not None:
+            df_smd_weighted = None
+            smd_estim_ps = None
+            ess = None
+            if model.propensity_scores_ is not None:
+                smd_estim_ps = std_mean_differences(
+                    model.propensity_scores_[mask_treated],
+                    model.propensity_scores_[~mask_treated],
+                )
+
+            if model.weights_ is not None:
+                ess = effective_sample_size(model.weights_[mask_treated])
+                df_smd_weighted = (
+                    data[covariates]
+                    .multiply(model.weights_, axis=0)
+                    .apply(
+                        lambda s: std_mean_differences(
+                            s[mask_treated], s[~mask_treated]
+                        )
+                    )
+                    .to_frame()
+                    .transpose()
+                    .add_prefix("smd_weighted_")
+                )
+
+            # Check special case of FedECA
+            if (
+                log_likelihood := getattr(model, "ll", None)  # noqa: E231, E999, E251
+            ) is None:
+                log_likelihood = model.log_likelihood_
+
+            if name != "FedECA":
+                backend_type = "N/A"
+                dp_target_epsilon = np.nan
+                dp_target_delta = np.nan
+
+            df_res_single = model.results_.assign(
+                method=name,
+                variance_method=getattr(model, "variance_method", None),
+                ess=ess,
+                smd_estim_ps=smd_estim_ps,
+                smd_true_ps=smd_true_ps,
+                ate_true=ate_true,
+                log_likelihood=log_likelihood,
+                fit_time=models_fit_times[name],
+                backend_type=backend_type,
+                dp_target_epsilon=dp_target_epsilon,
+                dp_target_delta=dp_target_delta,
+                percent_ties=percent_ties,
+            ).reset_index(drop=True)
+
+            if return_propensities:
+                df_res_single["propensity_scores"] = [model.propensity_scores_]
+            if return_weights:
+                df_res_single["weights"] = [model.weights_]
+            if df_smd_weighted is not None:
+                df_res_single = df_res_single.join(df_smd_weighted)
+
+            res.append(df_res_single)
+
+    if "n_clients" in fit_fedeca:
+        n_clients = fit_fedeca["n_clients"]
+    else:
+        n_clients = None
+
+    df_res = (
+        pd.concat(res)
+        .join(df_smd_raw)
+        .reset_index(drop=True)
+        .assign(
+            n_samples=n_samples,
+            n_events=int(data["event"].sum()),
+            ndim=data_gen.ndim,
+            features_type=data_gen.features_type,
+            overlap=data_gen.overlap,
+            cov_corr=data_gen.cov_corr,
+            prop_treated=data_gen.prop_treated,
+            scale_t=data_gen.scale_t,
+            shape_t=data_gen.shape_t,
+            censoring_factor=data_gen.censoring_factor,
+            percent_ties=data_gen.percent_ties,
+            random_censoring=data_gen.random_censoring,
+            standardize_features=data_gen.standardize_features,
+            n_clients=n_clients,
+        )
+    )
+
+    return df_res
diff --git a/fedeca/utils/moments_utils.py b/fedeca/utils/moments_utils.py
new file mode 100644
index 00000000..138d5c50
--- /dev/null
+++ b/fedeca/utils/moments_utils.py
@@ -0,0 +1,109 @@
+"""A module containing utils to compute high-order moments using Newton's formeanla."""
+from typing import Any, List
+
+import numpy as np
+import pandas as pd
+from scipy.special import binom
+
+
+def compute_uncentered_moment(data, order):
+    """Compute the uncentered moment.
+
+    Parameters
+    ----------
+    data : pd.DataFrame, np.array
+        dataframe.
+    order : int
+        order of the moment.
+
+    Returns
+    -------
+    pd.DataFrame, np.array
+        Moment of order k.
+
+    Raises
+    ------
+    NotImplementedError
+        Raised if the data type is not Dataframe nor np.ndarray.
+    """
+    if isinstance(data, (pd.DataFrame, pd.Series)):
+        moment = data.select_dtypes(include=np.number).pow(order).mean(skipna=True)
+    elif isinstance(data, np.ndarray):
+        moment = np.nanmean(np.power(data, order), axis=0)
+    else:
+        raise NotImplementedError(
+            "Only DataFrame or numpy array are currently handled."
+        )
+    return moment
+
+
+# pylint: disable=deprecated-typing-alias
+def compute_centered_moment(uncentered_moments: List[Any]):
+    r"""Compute the centered moment of order k.
+
+    Given a list of the k first unnormalized moments,
+    compute the centered moment of order k.
+    For high values of the moments the results can
+    differ from scipy.special.moment.
+    We are interested in computing
+    .. math::
+        \hat{\mu}_k  = \frac{1}{\hat{\sigma}^k}
+            \mathbb E_Z \left[ (Z - \hat{\mu})^k\right]
+        \hat{\mu}_k  = \frac{1}{\hat{\sigma}^k}
+            \mathbb E_Z \left[ \sum_{l=0}^k\binom{k}{l} Z^{k-l} (-1)^l\hat\mu^l)\right]
+        \hat{\mu}_k  = \frac{1}{\hat{\sigma}^k}
+          \sum_{l=0}^k(-1)^l\binom{k}{l} \mathbb E_Z \left[ Z^{k-l}
+          \right]\mathbb E_Z \left[ Z \right]^l
+    thus we only need the list uncentered moments up to order k.
+
+    Parameters
+    ----------
+    uncentered_moments : List[Any]
+        List of the k first non-centered moment.
+
+    Returns
+    -------
+    Any
+        The centered k-th moment.
+    """
+    mean = np.copy(uncentered_moments[0])
+    order = len(uncentered_moments)
+    result = (-mean) ** order  # i+1 = 0
+    # We will go over the list of moments to add Newton's binomial
+    # expansion formula terms one by one, where the current
+    # moment is ahead of i by 1 hence we call it moment_i_plus_1
+    for i, moment_i_plus_1 in enumerate(uncentered_moments):
+        temp = (-mean) ** (order - i - 1)
+        temp *= moment_i_plus_1  # the power is already computed
+        temp *= binom(order, i + 1)
+        result += temp
+    return result
+
+
+# pylint: disable=deprecated-typing-alias
+def aggregation_mean(local_means: List[Any], n_local_samples: List[int]):
+    """Aggregate local means.
+
+    Aggregate the local means into a global mean by using the local number of samples.
+
+    Parameters
+    ----------
+    local_means : List[Any]
+        List of local means. Could be array, float, Series.
+    n_local_samples : List[int]
+        List of number of samples used for each local mean.
+
+    Returns
+    -------
+    Any
+        Aggregated mean. Same type of the local means
+    """
+    tot_samples = np.copy(n_local_samples[0])
+    tot_mean = np.copy(local_means[0])
+    for mean, n_sample in zip(local_means[1:], n_local_samples[1:]):
+        mean = np.nan_to_num(mean, nan=0, copy=False)
+        tot_mean *= tot_samples / (tot_samples + n_sample)
+        tot_mean += mean * (n_sample / (tot_samples + n_sample))
+        tot_samples += n_sample
+
+    return tot_mean
diff --git a/fedeca/utils/substra_utils.py b/fedeca/utils/substra_utils.py
new file mode 100644
index 00000000..c267e67a
--- /dev/null
+++ b/fedeca/utils/substra_utils.py
@@ -0,0 +1,35 @@
+"""Utils for substra."""
+from substra import Client as SubstraClient
+
+
+class Client(SubstraClient):
+    """Wrapper around substra clients to take the simu argument.
+
+    This class mimics the behaviour of substra clients, except that it accepts
+    an additional backend type, "simu", that behavaves like the subprocess mode
+    but can be carried to the `Experiment` with only 1 parameter.
+
+    Parameters
+    ----------
+    backend_type : str, optional
+        Backend type to use.
+    """
+
+    def __init__(self, *args, **kwargs):
+        if "backend_type" in kwargs:
+            if kwargs["backend_type"] == "simu":
+                # We remove it not to raise Errors for unrecognized backend
+                kwargs["backend_type"] = None
+                # We init it with default backend which is subprocess
+                super().__init__(*args, **kwargs)
+                # We tag it with a mark
+                self.is_simu = True
+            else:
+                super().__init__(*args, **kwargs)
+                # We tag it with a mark
+                self.is_simu = False
+
+        else:
+            super().__init__(*args, **kwargs)
+            # We tag it with a mark
+            self.is_simu = False
diff --git a/fedeca/utils/substrafl_utils.py b/fedeca/utils/substrafl_utils.py
new file mode 100644
index 00000000..5637512d
--- /dev/null
+++ b/fedeca/utils/substrafl_utils.py
@@ -0,0 +1,648 @@
+"""Utils functions for Substra."""
+import os
+import pickle
+import tempfile
+from collections.abc import Callable
+from pathlib import Path
+from typing import Union
+
+import lifelines
+import numpy as np
+import pandas as pd
+import substra
+import torch
+from sklearn.metrics import accuracy_score
+from substrafl.dependency import Dependency
+from substrafl.evaluation_strategy import EvaluationStrategy
+from substrafl.experiment import execute_experiment as execute_experiment_substra
+from substrafl.nodes import AggregationNode, TestDataNode, TrainDataNode
+from substrafl.nodes.node import OutputIdentifiers
+
+from fedeca.utils.data_utils import split_dataframe_across_clients
+
+try:
+    import git
+except ImportError:
+    pass
+import json
+import subprocess
+
+from substrafl.model_loading import (
+    FUNCTION_DICT_KEY,
+    METADATA_FILE,
+    MODEL_DICT_KEY,
+    REQUIRED_KEYS,
+    _check_environment_compatibility,
+)
+
+
+class Experiment:
+    """Experiment class."""
+
+    def __init__(
+        self,
+        strategies: list,
+        num_rounds_list: list[int],
+        ds_client=None,
+        train_data_nodes: Union[list[TrainDataNode], None] = None,
+        metrics_dicts_list: Union[list[dict], None] = None,
+        test_data_nodes: Union[list[TestDataNode], None] = None,
+        aggregation_node: Union[AggregationNode, None] = None,
+        evaluation_frequency: Union[int, None] = None,
+        experiment_folder: str = "./experiments",
+        clean_models: bool = False,
+        fedeca_path: Union[str, None] = None,
+        algo_dependencies: Union[list, None] = None,
+    ):
+        """Initialize an experiment.
+
+        Parameters
+        ----------
+        ds_client : fl.client.Client
+            Federated Learning client object used to register computations.
+        strategies : list
+            List of strategies to run.
+        train_data_nodes : Union[list[TrainDataNode], None]
+            List of data nodes for training. If None cannot use the run method
+            directly.
+        num_rounds_list : list
+            List of number of rounds for each strategy.
+        metrics_dicts_list : list[dict], optional
+            Dict of metric functions, by default None.
+        test_data_nodes : list, optional
+            List of data nodes for testing, by default None.
+        aggregation_node : fl.data.DataNode, optional
+            Aggregation node, by default None.
+        evaluation_frequency : int, optional
+            Frequency of evaluation, by default 1.
+        experiment_folder : str, optional
+            Folder path for experiment outputs, by default "./experiments".
+        clean_models : bool, optional
+            Whether to clean models after training, by default False.
+        fedeca_path : str, optional
+            Path to the FedECA package, by default None.
+        algo_dependencies : list, optional
+            List of algorithm dependencies, by default [].
+        """
+        if metrics_dicts_list is not None:
+            assert len(strategies) == len(metrics_dicts_list)
+        assert len(num_rounds_list) == len(strategies)
+        self.strategies = strategies
+        self.metrics_dicts_list = metrics_dicts_list
+        self.num_rounds_list = num_rounds_list
+        self.ds_client = ds_client
+        self.train_data_nodes = train_data_nodes
+        self.test_data_nodes = test_data_nodes
+        self.simu_mode = False
+
+        if self.test_data_nodes is None:
+            assert metrics_dicts_list is not None
+            if self.train_data_nodes is not None:
+                self.test_data_nodes = [
+                    TestDataNode(
+                        t.organization_id, t.data_manager_key, t.data_sample_keys, []
+                    )
+                    for t in self.train_data_nodes
+                ]
+        else:
+            if metrics_dicts_list and not all(
+                [len(t.metric_functions) == 0 for t in self.test_data_nodes]
+            ):
+                print(
+                    """WARNING: you are passing metrics to test data nodes with existing
+                    metric_functions this will overwrite them"""
+                )
+                print(
+                    [
+                        (f"Client {i}", t.metric_functions)
+                        for i, t in enumerate(self.test_data_nodes)
+                    ]
+                )
+
+        self.evaluation_frequency = evaluation_frequency
+
+        self.aggregation_node = aggregation_node
+        self.experiment_folder = experiment_folder
+        self.clean_models = clean_models
+
+        # Packaging the right dependencies
+        if fedeca_path is None:
+            fedeca_path = os.getcwd()
+        repo_folder = Path(
+            git.Repo(fedeca_path, search_parent_directories=True).working_dir
+        ).resolve()
+        wheel_folder = repo_folder / "temp"
+        os.makedirs(wheel_folder, exist_ok=True)
+        for stale_wheel in wheel_folder.glob("fedeca*.whl"):
+            stale_wheel.unlink()
+        process = subprocess.Popen(
+            f"python -m build --wheel --outdir {wheel_folder} {repo_folder}",
+            shell=True,
+            stdout=subprocess.PIPE,
+        )
+        process.wait()
+        assert process.returncode == 0, "Failed to build the wheel"
+        wheel_path = next(wheel_folder.glob("fedeca*.whl"))
+        if algo_dependencies is None:
+            algo_dependencies = []
+
+        self.algo_dependencies = Dependency(
+            pypi_dependencies=["numpy==1.23.1", "torch==1.11.0", "lifelines", "pandas"]
+            + algo_dependencies,
+            local_dependencies=[wheel_path],
+        )
+
+        self.experiment_path = str(Path(self.experiment_folder))
+        os.makedirs(self.experiment_path, exist_ok=True)
+        self.run_strategies = 0
+        self.tasks = {}
+        self.compute_plan_keys = []
+        self.performances_strategies = []
+
+    def fit(
+        self,
+        data: pd.DataFrame,
+        nb_clients: Union[int, None] = None,
+        split_method: Union[Callable, str] = "uniform",
+        split_method_kwargs: Union[Callable, None] = None,
+        data_path: Union[str, None] = None,
+        backend_type: str = "subprocess",
+        urls: Union[list[str], None] = None,
+        tokens: Union[list[str], None] = None,
+    ):
+        """Fit strategies on global data split across clients.
+
+        For test if provided we use test_data_nodes from int or the
+        train_data_nodes in the latter train=test.
+
+        Parameters
+        ----------
+        data : pd.DataFrame
+            The global data to be split has to be a dataframe as we only support
+            one opener type.
+        nb_clients : Union[int, None], optional
+            The number of clients used to split data across, by default None
+        split_method : Union[Callable, None], optional
+            How to split data across the nb_clients, by default None.
+        split_method_kwargs : Union[Callable, None], optional
+            Argument of the function used to split data, by default None.
+        data_path : Union[str, None]
+            Where to store the data on disk when backend is not remote.
+        backend_type: str
+            The backend to use for substra. Can be either:
+            ["subprocess", "docker", "remote"]. Defaults to "subprocess".
+        urls: Union[list[str], None]
+            Urls corresponding to clients API if using remote backend_type.
+            Defaults to None.
+        tokens: Union[list[str], None]
+            Tokens necessary to authenticate each client API if backend_type
+            is remote. Defauts to None.
+        """
+        # Reset experiment so that it can fit on a new dataset
+        self.reset_experiment()
+
+        if data_path is not None:
+            self.experiment_path = data_path
+
+        # We first have to create the TrainDataNodes objects for this we split
+        # the data into nb_clients using split_method
+        (
+            self.clients,
+            self.train_data_nodes,
+            test_data_nodes,
+            _,
+            _,
+        ) = split_dataframe_across_clients(
+            df=data,
+            n_clients=nb_clients,
+            split_method=split_method,
+            split_method_kwargs=split_method_kwargs,
+            backend_type=backend_type,
+            data_path=data_path,
+            urls=urls,
+            tokens=tokens,
+        )
+        if self.test_data_nodes is None:
+            self.test_data_nodes = test_data_nodes
+        self.run()
+
+    def run(self, num_strategies_to_run=None):
+        """Run the experiment.
+
+        Parameters
+        ----------
+        num_strategies_to_run : int, optional
+            Number of strategies to run, by default None.
+        """
+        assert (
+            self.train_data_nodes is not None
+        ), "you have to define train_data_nodes first before running"
+        assert (
+            self.test_data_nodes is not None
+        ), "you have to define test_data_nodes first before running"
+        if num_strategies_to_run is None:
+            num_strategies_to_run = len(self.strategies) - self.run_strategies
+        assert (self.run_strategies + num_strategies_to_run) <= len(
+            self.strategies
+        ), f"""You cannot run {num_strategies_to_run} strategies more there is only
+        {len(self.strategies)} strategies and you have already run {self.run_strategies}
+        of them."""
+        # If no client is given we take the first one
+        if self.ds_client is None:
+            self.ds_client = self.clients[list(self.clients.keys())[0]]
+
+        # If no AggregationNode is given we take the first one
+        if self.aggregation_node is None:
+            print("Using the first client as a server.")
+            kwargs_agg_node = {
+                "organization_id": self.train_data_nodes[0].organization_id
+            }
+            self.aggregation_node = AggregationNode(**kwargs_agg_node)
+
+        if not hasattr(self, "experiment_kwargs"):
+            self.experiment_kwargs = {
+                "experiment_folder": self.experiment_path,
+                "clean_models": self.clean_models,
+                "dependencies": self.algo_dependencies,
+                "client": self.ds_client,
+            }
+        if hasattr(self.ds_client, "is_simu"):
+            self.simu_mode = self.ds_client.is_simu
+
+        # inelegant but cannot slice on a zip object
+        strategies = self.strategies[
+            self.run_strategies : (self.run_strategies + num_strategies_to_run)
+        ]  # noqa: E203
+        metrics_dicts_list = self.metrics_dicts_list[
+            self.run_strategies : (
+                self.run_strategies + num_strategies_to_run
+            )  # noqa: E203
+        ]
+        num_rounds_list = self.num_rounds_list[
+            self.run_strategies : (
+                self.run_strategies + num_strategies_to_run
+            )  # noqa: E203
+        ]
+        for i, (strategy, metrics_dict, num_rounds) in enumerate(
+            zip(strategies, metrics_dicts_list, num_rounds_list)
+        ):
+            for t in self.test_data_nodes:
+                t.metric_functions = metrics_dict
+
+            current_kwargs = self.experiment_kwargs
+            current_kwargs["strategy"] = strategy
+            current_kwargs["num_rounds"] = num_rounds
+            current_kwargs["train_data_nodes"] = self.train_data_nodes
+            current_kwargs["aggregation_node"] = self.aggregation_node
+            # Evaluation frequency depend on current strategy
+            # If None evaluate once at the end of the strategy
+            if self.evaluation_frequency is None:
+                evaluation_strategy = EvaluationStrategy(
+                    test_data_nodes=self.test_data_nodes,
+                    eval_rounds=[num_rounds_list[i]],
+                )
+            else:
+                evaluation_strategy = EvaluationStrategy(
+                    test_data_nodes=self.test_data_nodes,
+                    eval_frequency=self.evaluation_frequency[i],
+                )
+            current_kwargs["evaluation_strategy"] = evaluation_strategy
+            current_kwargs["simu_mode"] = self.simu_mode
+            current_kwargs["name"] = f"Fedeca: {strategy.__class__.__name__}"
+            xp_output = execute_experiment(**current_kwargs)
+
+            if self.simu_mode:
+                scores = [t.scores for t in self.test_data_nodes]
+                robust_cox_variance = False
+                for idx, s in enumerate(scores):
+                    print(f"====Client {idx}====")
+                    try:
+                        print(s[-1])
+                    except IndexError:
+                        robust_cox_variance = True
+                        print("No metric")
+                # TODO Check that it is well formatted it's probably not
+                self.performances_strategies.append(pd.DataFrame(xp_output))
+                # Hacky hacky hack
+                if robust_cox_variance:
+                    xp_output = self.train_data_nodes
+                else:
+                    xp_output = self.train_data_nodes[0]
+
+            self.compute_plan_keys.append(xp_output)
+
+            if not (self.simu_mode):
+                self.tasks[self.compute_plan_keys[i].key] = {}
+                tasks = self.ds_client.list_task(
+                    filters={"compute_plan_key": [self.compute_plan_keys[i].key]}
+                )[::-1]
+                tasks_names = [t.function.name for t in tasks]
+                self.tasks[self.compute_plan_keys[i].key]["tasks"] = tasks
+                self.tasks[self.compute_plan_keys[i].key]["tasks_names"] = tasks_names
+                self.tasks[self.compute_plan_keys[i].key]["num_tasks"] = len(tasks)
+
+            self.run_strategies += 1
+
+    def get_outmodel(self, task_name, strategy_idx=0, idx_task=0):
+        """Get the output model.
+
+        Parameters
+        ----------
+        task_name : str
+            Name of the task.
+        strategy_idx : int, optional
+            Index of the strategy, by default 0.
+        idx_task : int, optional
+            Index of the task, by default 0.
+        """
+        assert not (self.simu_mode), "This function cannot be used in simu mode"
+
+        # We get all matches and order them chronologically
+        tasks_dict_from_strategy = self.tasks[self.compute_plan_keys[strategy_idx].key]
+        return get_outmodel_function(
+            task_name, idx_task=idx_task, tasks_dict=tasks_dict_from_strategy
+        )
+
+    def reset_experiment(self):
+        """Reset the state of the object.
+
+        So it can be fit with a new dataset.
+        """
+        self.run_strategies = 0
+        self.tasks = {}
+        self.compute_plan_keys = []
+        self.performances_strategies = []
+        self.train_data_nodes = None
+        self.test_data_nodes = None
+
+
+def get_outmodel_function(
+    task_name, client, compute_plan_key=None, idx_task=0, tasks_dict={}
+):
+    """Retrieve an output model from a task or tasks_dict."""
+    assert (
+        compute_plan_key is not None or tasks_dict
+    ), "Please provide a tasks dict or a compute plan key"
+    if tasks_dict:
+        assert compute_plan_key is None
+        assert (
+            ("num_tasks" in tasks_dict)
+            and ("tasks" in tasks_dict)
+            and ("tasks_names" in tasks_dict)
+        )
+    else:
+        assert isinstance(compute_plan_key, str)
+        assert not tasks_dict
+        assert client is not None
+        tasks = client.list_task(filters={"compute_plan_key": [compute_plan_key]})[::-1]
+        tasks_names = [t.function.name for t in tasks]
+        tasks_dict = {}
+        tasks_dict["tasks"] = tasks
+        tasks_dict["tasks_names"] = tasks_names
+        tasks_dict["num_tasks"] = len(tasks)
+
+    num_tasks = tasks_dict["num_tasks"]
+    compatible_indices = [
+        i for i in range(num_tasks) if tasks_dict["tasks_names"][i] == task_name
+    ]
+    idx_outmodel = compatible_indices[idx_task]
+    outmodel_task = tasks_dict["tasks"][idx_outmodel]
+    with tempfile.TemporaryDirectory() as temp_dir:
+        identifier = outmodel_task.function.outputs[0].identifier
+        model_path = client.download_model_from_task(
+            outmodel_task.key, identifier, temp_dir
+        )
+        if identifier in ["model", "shared"]:
+            # Assumes it can be unserialized with pickle
+            with open(model_path, "rb") as f:
+                outmodel = pickle.load(f)
+        elif identifier == "local":
+            # Assumes it can be unserialized with torch
+            outmodel = torch.load(model_path)
+        else:
+            raise ValueError(f"Identifier {identifier} not recognized")
+
+    return outmodel
+
+
+class SubstraflTorchDataset(torch.utils.data.Dataset):
+    """Substra toch dataset class."""
+
+    def __init__(
+        self,
+        datasamples,
+        is_inference: bool,
+        target_columns=["T", "E"],
+        columns_to_drop=[],
+        dtype="float64",
+        return_torch_tensors=False,
+    ):
+        """Initialize SubstraflTorchDataset class.
+
+        Parameters
+        ----------
+        datasamples : pandas.DataFrame
+            Data samples.
+        is_inference : bool
+            Flag indicating if the dataset is for inference.
+        target_columns : list, optional
+            List of target columns, by default ["T", "E"].
+        columns_to_drop : list, optional
+            List of columns to drop, by default [].
+        dtype : str, optional
+            Data type, by default "float64".
+        return_torch_tensors: bool, optional
+            Returns torch.Tensor, actually substra generally expects your dataset
+            to return torch.Tensor and not numpy as the training loop uses pytorch
+            and doesn't explicitly call torch.from_numpy. This is different from
+            say NewtonRaphson and WebDisco which are numpy-based. Defaults to False.
+        """
+        self.data = datasamples
+        self.is_inference = is_inference
+        self.target_columns = target_columns
+        self.columns_to_drop = list(set(columns_to_drop + self.target_columns))
+        self.x = self.data.drop(columns=self.columns_to_drop).to_numpy().astype(dtype)
+        self.y = self.data[self.target_columns].to_numpy().astype(dtype)
+        self.return_torch_tensors = return_torch_tensors
+
+    def __getitem__(self, idx):
+        """Get item."""
+        if self.is_inference:
+            x = self.x[idx]
+            if self.return_torch_tensors:
+                x = torch.from_numpy(x)
+            return x
+
+        else:
+            x, y = self.x[idx], self.y[idx]
+            if self.return_torch_tensors:
+                x, y = torch.from_numpy(x), torch.from_numpy(y)
+            return x, y
+
+    def __len__(self):
+        """Get length."""
+        return len(self.data.index)
+
+
+def make_substrafl_torch_dataset_class(
+    target_cols,
+    event_col,
+    duration_col,
+    dtype="float64",
+    return_torch_tensors=False,
+):
+    """Create a custom SubstraflTorchDataset class for survival analysis.
+
+    Parameters
+    ----------
+    target_cols : list
+        List of target columns.
+    event_col : str
+        Name of the event column.
+    duration_col : str
+        Name of the duration column.
+    dtype : str, optional
+        Data type, by default "float64".
+    return_torch_tensors : bool, optional
+        Returns torch.Tensor. Defaults to False.
+
+    Returns
+    -------
+    type
+        Custom SubstraflTorchDataset class.
+    """
+    assert len(target_cols) == 1 or all(
+        [t in [event_col, duration_col] for t in target_cols]
+    )
+    if len(target_cols) == 1:
+        print(f"Making a dataset class to fit a model to predict {target_cols[0]}")
+        columns_to_drop = [event_col, duration_col]
+    elif len(target_cols) == 2:
+        assert set(target_cols) == set(
+            [event_col, duration_col]
+        ), "Your targets should be event_col and duration_col"
+        # DO NOT MODIFY THIS LINE !!!!!
+        target_cols = [duration_col, event_col]
+        columns_to_drop = []
+
+    class MySubstraflTorchDataset(SubstraflTorchDataset):
+        def __init__(self, datasamples, is_inference):
+            super().__init__(
+                datasamples=datasamples,
+                is_inference=is_inference,
+                target_columns=target_cols,
+                columns_to_drop=columns_to_drop,
+                dtype=dtype,
+                return_torch_tensors=return_torch_tensors,
+            )
+
+    return MySubstraflTorchDataset
+
+
+def make_c_index_function(duration_col: str, event_col: str):
+    """Build C-index function.
+
+    Parameters
+    ----------
+    duration_col : str,
+        Column name for the duration.
+    event_col : str,
+        Column name for the event.
+    """
+
+    def c_index(datasamples, predictions_path):
+        times_true = datasamples[duration_col]
+        events = datasamples[event_col]
+        if isinstance(predictions_path, str) or isinstance(predictions_path, Path):
+            y_pred = np.load(predictions_path)
+        else:
+            y_pred = predictions_path
+
+        c_index = lifelines.utils.concordance_index(times_true, -y_pred, events)
+        return c_index
+
+    return c_index
+
+
+def make_accuracy_function(treatment_col: str):
+    """Build accuracy function.
+
+    Parameters
+    ----------
+    treatment_col: str,
+        Column name for the treatment allocation.
+    """
+
+    def accuracy(datasamples, predictions_path):
+        y_true = datasamples[treatment_col]
+        if isinstance(predictions_path, str) or isinstance(predictions_path, Path):
+            y_pred = np.load(predictions_path)
+        else:
+            y_pred = predictions_path
+        return accuracy_score(y_true, y_pred > 0.5)
+
+    return accuracy
+
+
+def download_train_task_models_by_round(
+    client, dest_folder, compute_plan_key, round_idx
+):
+    """Download models associated with a specific round of a train task."""
+    compute_plan = client.get_compute_plan(compute_plan_key)
+
+    _check_environment_compatibility(metadata=compute_plan.metadata)
+
+    folder = Path(dest_folder)
+    folder.mkdir(exist_ok=True, parents=True)
+
+    if round_idx is None:
+        round_idx = compute_plan.metadata["num_rounds"]
+
+    # Retrieve local train task key
+    local_train_tasks = client.list_task(
+        filters={
+            "compute_plan_key": [compute_plan.key],
+            "metadata": [{"key": "round_idx", "type": "is", "value": str(round_idx)}],
+            "worker": [client.organization_info().organization_id],
+        }
+    )
+    local_train_tasks = [t for t in local_train_tasks if t.tag == "train"]
+    train_task = local_train_tasks[-1]
+
+    # Get the associated head model (local state)
+    model_file = client.download_model_from_task(
+        train_task.key, folder=folder, identifier=OutputIdentifiers.local
+    )
+    function_file = client.download_function(
+        train_task.function.key, destination_folder=folder
+    )
+
+    # Environment requirements and local state path
+    metadata = {k: v for k, v in compute_plan.metadata.items() if k in REQUIRED_KEYS}
+    metadata[MODEL_DICT_KEY] = str(model_file.relative_to(folder))
+    metadata[FUNCTION_DICT_KEY] = str(function_file.relative_to(folder))
+    metadata_path = folder / METADATA_FILE
+    metadata_path.write_text(json.dumps(metadata))
+    return model_file
+
+
+def execute_experiment(
+    *args,
+    **kwargs,
+) -> substra.sdk.models.ComputePlan:
+    """Execute experiment with fedeca client.
+
+    This allows to infer the simu mode using the backend_type of the client.
+
+    Returns
+    -------
+    substra.sdk.models.ComputePlan
+        A compute plan
+    """
+    if len(args) > 0:
+        client_arg = args[0]
+    else:
+        client_arg = kwargs["client"]
+    if hasattr(client_arg, "is_simu"):
+        kwargs["simu_mode"] = client_arg.is_simu
+    return execute_experiment_substra(*args, **kwargs)
diff --git a/fedeca/utils/survival_utils.py b/fedeca/utils/survival_utils.py
new file mode 100644
index 00000000..95c843a3
--- /dev/null
+++ b/fedeca/utils/survival_utils.py
@@ -0,0 +1,1345 @@
+"""Provide utils to simulate survival data."""
+from __future__ import annotations
+
+import copy
+from typing import Final, Literal, Optional, Protocol
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import torch
+from numpy.typing import NDArray
+from scipy import stats
+from scipy.linalg import toeplitz
+from sklearn.base import BaseEstimator
+from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler
+
+from fedeca.utils.typing import _FuncCateType, _FuncPropensityType, _SeedType
+
+
+class BaseSurvivalEstimator(BaseEstimator):
+    """Base estimator for time-to-event analysis."""
+
+    def __init__(
+        self,
+        treated_col: str = "treated",
+        event_col: str = "event",
+        duration_col: str = "time",
+        ps_col: Optional[str] = "propensity_scores",
+        seed: _SeedType = None,
+    ):
+        """Initialize the BaseEstimator class.
+
+        Parameters
+        ----------
+        treated_col : str, optional
+            Column name indicating treatment status, by default "treated".
+        event_col : str, optional
+            Column name indicating event occurrence, by default "event".
+        duration_col : str, optional
+            Column name indicating time to event or censoring, by default "time".
+        ps_col : str or None, optional
+            Column name indicating propensity scores, by default "propensity_scores".
+        seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}
+            The seed for reproducibility. Defaults to None.
+        """
+        self.treated_col = treated_col
+        self.event_col = event_col
+        self.duration_col = duration_col
+        self.ps_col = ps_col
+        self.rng = np.random.default_rng(seed)
+        self.log_likelihood_: Optional[float] = None
+        self.results_: Optional[pd.DataFrame] = None
+        self.weights_: Optional[np.ndarray] = None
+        self.propensity_scores_: Optional[np.ndarray] = None
+
+    def fit(self, data: pd.DataFrame, targets: Optional[pd.DataFrame] = None) -> None:
+        """Fit the model to the provided data and optionally the target values.
+
+        This method trains the model using the input data and, if available,
+        the target values. The model's internal parameters are updated during
+        training to learn from the provided data.
+        The updated internal parameters are:
+        * results_: summary data frame of the fitting results. Expect at least
+          one row labeled by `self.treated_col`, with columns "coef" for
+          treatment effect, and column "p" for the p-value of the estimation.
+        * weights_: weights assigned to each row of `data`
+        * log_likelihood_: The log-likelihood of the model fitted with `data`
+
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Input data as a DataFrame containing features used for training the model.
+        targets : pd.DataFrame, optional
+            Target values associated with the input data, by default None.
+            If provided, the model is trained using both the input data
+            and target values.
+
+        Returns
+        -------
+        None
+        """
+
+    def reset_state(self) -> None:
+        """Reset the estimator's internal parameters related to fitted results."""
+        self.results_ = None
+        self.weights_ = None
+        self.log_likelihood_ = None
+        self.propensity_scores_ = None
+
+    def set_random_state(self, seed: _SeedType) -> None:
+        """Set random state."""
+        self.rng = np.random.default_rng(seed)
+
+
+class BootstrapMixinProtocol(Protocol):
+    """Protocol class for type checking."""
+
+    def point_estimate(self, data: pd.DataFrame) -> npt.ArrayLike:
+        """Return a point estimate of treatment effect."""
+        return np.array([])
+
+
+class BootstrapMixin(BootstrapMixinProtocol):
+    """Mixin class for bootstrapping utilities."""
+
+    def bootstrap_sample(
+        self, data: pd.DataFrame, seed: _SeedType = None
+    ) -> pd.DataFrame:
+        """Resampling with replacement."""
+        rng = np.random.default_rng(seed)
+        return data.sample(data.shape[0], replace=True, random_state=rng)
+
+    def bootstrap_std(
+        self, data: pd.DataFrame, n_bootstrap: int, seed: _SeedType = None
+    ) -> Optional[np.ndarray]:
+        """Bootstrap the standard deviation of the treatment effect estimation."""
+        if n_bootstrap <= 1:
+            return None
+        rng = np.random.default_rng(seed)
+
+        def bootstrap_coef() -> np.ndarray:
+            data_resampled = self.bootstrap_sample(data, rng)
+            return np.array(self.point_estimate(data_resampled))
+
+        std = np.std([bootstrap_coef() for _ in range(n_bootstrap)], axis=0)
+        return std
+
+
+def compute_summary(
+    coef: np.ndarray,
+    coef_std: np.ndarray,
+    coef_null: Optional[np.ndarray] = None,
+    alpha: float = 0.05,
+    index: Optional[list[str] | pd.Index] = None,
+) -> pd.DataFrame:
+    """Compute summary for parameter estimation."""
+    conf_int = f"{np.round(100 * (1 - alpha)):.0f}"
+    delta_coef = np.multiply(stats.norm.ppf(1 - alpha / 2), coef_std)
+    if coef_null is None:
+        coef_null = np.zeros_like(coef)
+    z_score = (coef - coef_null) / coef_std
+    p_value = 2 * stats.norm.sf(np.abs(z_score))
+
+    res = pd.DataFrame()
+    res["coef"] = coef
+    res["exp(coef)"] = np.exp(coef)
+    res["se(coef)"] = coef_std
+    res[f"coef lower {conf_int}%"] = coef - delta_coef
+    res[f"coef upper {conf_int}%"] = coef + delta_coef
+    res[f"exp(coef) lower {conf_int}%"] = np.exp(coef - delta_coef)
+    res[f"exp(coef) upper {conf_int}%"] = np.exp(coef + delta_coef)
+    res["cmp to"] = coef_null
+    res["z"] = z_score
+    res["p"] = p_value
+    res["-log2(p)"] = -np.log2(p_value)
+
+    if index is not None:
+        res.index = index
+
+    return res
+
+
+class CoxData:
+    """Simulate Cox data.
+
+    This class simulates survival data following Cox model assumptions.
+    """
+
+    def __init__(
+        self,
+        n_samples: int = 1000,
+        ndim: int = 10,
+        features_type: Literal[
+            "cov_toeplitz",
+            "cov_uniform",
+            "indep_gauss",
+        ] = "cov_toeplitz",
+        cate: float | Literal["random", "linear"] = 1.0,
+        propensity: Literal["constant", "linear"] = "constant",
+        prop_treated: float = 0.5,
+        overlap: float = 0.0,
+        cov_corr: float = 0.5,
+        scale_t: float = 1.0,
+        shape_t: float = 1.0,
+        censoring_factor: float = 0.5,
+        percent_ties: Optional[float] = None,
+        random_censoring: bool = False,
+        seed: _SeedType = None,
+        standardize_features: bool = True,
+        dtype: Literal["float32", "float64"] = "float64",
+    ):
+        r"""Cox Data generator class.
+
+        This class generates data according to a Cox proportional hazards model
+        in continuous time as follows:
+        .. math::
+          S(t|x) = P(T > t | X=x)
+          \\lambda(t|x) = \\frac{d \\log S(t|x)}{dt}
+          \\lambda(t|x) = \\lambda_0(t)e^{\\beta^T x}
+          \\Lambda_0(t|x) = \\int_0^t \\lambda_0(u)du = (\\frac{t}{s})^k
+          X \\sim \\mathcal{N}(0, C)
+          \\beta \\sim \\mathcal{N}(0, I)
+
+        Parameters
+        ----------
+        n_samples: int, optional
+            Number of samples to generate. Defaults to 1000
+        ndim: int, optional
+            Number of features, defaults to 10.
+        features_type: `{"cov_toeplitz", "cov_uniform", "indep_gauss"}`, optional
+        cate: {float, `{"random", "linear"}`, Callable}
+            The way to assign treatment effect (hazard ratio) to samples.
+            * "float": Constant hazard ratio for all samples.
+            * "random": Hazard ratio follows log-normal distribution.
+            * "linear": Hazard ratio depends on a linear combination of
+              features with random coefficients.
+            Defaults to 1.0 (no treatment effect).
+        propensity: {`{"constant", "linear"}`, Callable}
+            The way to assign propensity scores (probabilities of being treated)
+            to samples.
+            * "linear": Propensity scores depend on a linear combination of
+              features with random coefficients.
+            * "constant": All propensity scores take the value of the constant
+              defined by the parameter `prop_treated`.
+            Defaults to "constant".
+        cov_corr: float, optional
+            The correlation of the covariance matrix.
+        scale_t: float, optional
+            Scale parameter `s` in the equations above. Defaults to `1.0`.
+        shape_t: float, optional
+            Shape parameter `k` in the equations above. Defaults to `1.0`.
+        censoring_factor: float, optional
+            Parameter used to determine the probability of being censored
+            (with respect to the median). Defaults to `0.5`.
+        percent_ties: float, optional
+            Parameter that control the percentage of samples who have the same outcome.
+            Defaults to None.
+        random_censoring: bool, optional
+            Whether to censor completely independently of the rest or not.
+            When true, censors samples with probability censoring_factor.
+            When false, samples are censored if the drawn event times
+            (drawn from the Cox model) is smaller than an independent
+            exponential variable with scale factor
+            `censoring_factor * mean_time`, where `mean_time`
+            is the empirical mean of drawn event times.
+            Defaults to False.
+        seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator},
+            optional
+            The seed for reproducibility. Defaults to None.
+        standardize_features: bool, optional
+            Whether to standardize features or not. Defaults to True.
+        dtype : `{"float64", "float32"}`, default="float64"
+            Type of the arrays used.
+        """
+        self.n_samples = n_samples
+        self.ndim = ndim
+        self.features_type: Final = features_type
+        self.rng = np.random.default_rng(seed)
+        self.prop_treated = prop_treated
+        self.overlap = overlap
+        self.cate = cate
+        self.propensity = propensity
+        self.cov_corr = cov_corr
+        self.scale_t = scale_t
+        self.shape_t = shape_t
+        self.censoring_factor = censoring_factor
+        self.random_censoring = random_censoring
+        self.standardize_features = standardize_features
+        self.dtype: Final = dtype
+        self.coeffs = None
+        self.percent_ties = percent_ties
+        self.average_treatment_effect_ = None
+        self.probability_treated = None
+
+    def standardize_data(self, features: np.ndarray):
+        """Standardize data. Make data reduced centered.
+
+        Standardize the data by substracting the mean of each columns
+        and dividing by the standard deviation.
+
+        Parameters
+        ----------
+        features : np.ndarray
+            Features to standardize.
+
+        Returns
+        -------
+        np.ndarray
+            Normalized features.
+        """
+        features -= features.mean(axis=0)
+        features /= features.std(axis=0)
+        return features
+
+    def generate_data(
+        self,
+        n_samples: Optional[int] = None,
+        seed: _SeedType = None,
+        use_cate: bool = True,
+    ):
+        """Generate final survival data.
+
+        Use the collection of methods of the class to
+        generate data following Cox assumptions.
+
+        Returns
+        -------
+        tuple
+            A tuple of np.ndarrays.
+
+        Raises
+        ------
+        ValueError
+            If `propensity` is neither "constant" nor "linear".
+        ValueError
+            If `cate` is neither "linear", "random" nor a constant type int or float.
+        """
+        if n_samples is None:
+            n_samples = self.n_samples
+        if seed is None:
+            seed = self.rng
+        rng = np.random.default_rng(seed)
+
+        if self.features_type == "cov_uniform":
+            X = features_normal_cov_uniform(
+                n_samples, self.ndim, dtype=self.dtype, seed=rng
+            )
+        elif self.features_type == "indep_gauss":
+            X = rng.standard_normal(size=(n_samples, self.ndim)).astype(self.dtype)
+        else:
+            X = features_normal_cov_toeplitz(
+                n_samples, self.ndim, self.cov_corr, dtype=self.dtype, seed=rng
+            )
+        if self.standardize_features:
+            X = self.standardize_data(X)
+
+        if self.propensity == "constant":
+            treat_alloc = random_treatment_allocation(
+                n_samples, self.prop_treated, seed=rng
+            )
+            propensity_scores = np.repeat(self.prop_treated, n_samples)
+
+        elif self.propensity == "linear":
+            func_propensity = linear_propensity(
+                ndim=self.ndim,
+                overlap=self.overlap,
+                prop_treated=self.prop_treated,
+                seed=rng,
+            )
+            propensity_scores = np.apply_along_axis(func_propensity, -1, X)
+            treat_alloc = rng.binomial(1, propensity_scores)
+        else:
+            raise ValueError("propensity must be either `constant` or `linear`")
+
+        self.coeffs = rng.normal(size=(self.ndim,)).astype(self.dtype)
+        u = X.dot(self.coeffs)
+        if use_cate:
+            if self.cate == "linear":
+                func_cate = linear_cate(ndim=self.ndim, seed=rng)
+            elif self.cate == "random":
+                func_cate = random_cate(seed=rng)
+            elif isinstance(self.cate, (int, float)):
+                func_cate = constant_cate(self.cate)
+            else:
+                raise ValueError(
+                    """cate must be either `linear`, `random` or a constant type
+                    int or float"""
+                )
+
+            cate_vector = np.apply_along_axis(func_cate, -1, X)
+            self.average_treatment_effect_ = np.mean(cate_vector[treat_alloc == 1])
+            self.probability_treated = cate_vector
+            u += treat_alloc * np.log(cate_vector)
+        # Simulation of true times
+        time_hazard_baseline = -np.log(
+            rng.uniform(0, 1.0, size=n_samples).astype(self.dtype)
+        )
+        time_cox_unscaled = time_hazard_baseline * np.exp(-u)
+        times = self.scale_t * time_cox_unscaled ** (1.0 / self.shape_t)
+
+        # induce samples with same times
+        if self.percent_ties is not None:
+            nb_ties_target = int(self.percent_ties * n_samples)
+            if nb_ties_target >= 2:
+                # sklearn not supporting generator yet, pass int to random_state
+                # ref: https://github.com/scikit-learn/scikit-learn/issues/16988
+                seed_seq = rng.bit_generator._seed_seq.spawn(1)[0]  # type: ignore
+                random_state = seed_seq.generate_state(1)[0]
+                original_times = copy.deepcopy(times)
+                # We progressively reduce the number of bins until there are
+                # only 2 bins starting with npoints - 1 bins
+                reached = False
+                for nbins in range(n_samples - 1, 1, -1):
+                    discretizer = KBinsDiscretizer(
+                        n_bins=nbins,
+                        encode="ordinal",
+                        strategy="quantile",
+                        random_state=random_state,
+                    )
+                    times = discretizer.fit_transform(original_times.reshape((-1, 1)))
+                    nb_ties_reached = n_samples - len(np.unique(times))
+                    if (nb_ties_reached - nb_ties_target) >= 0:
+                        reached = True
+                        break
+                if not reached:
+                    raise ValueError("This should not happen, lower percent_ties")
+                times = times.reshape((-1))
+
+            else:
+                raise ValueError("Choose a larger number of ties")
+
+        avg_time = times.mean()
+
+        # Simulation of the censoring times. times is returned in absolute value
+        if self.random_censoring:
+            censoring = rng.uniform(size=n_samples) < self.censoring_factor
+            times[censoring] = [rng.uniform(0, t) for t in times[censoring].tolist()]
+            censoring = censoring.astype("uint8")
+        else:
+            c_sampled = rng.exponential(
+                scale=self.censoring_factor * avg_time, size=n_samples
+            ).astype(self.dtype)
+
+            censoring = (times > c_sampled).astype("uint8")
+            times[censoring] = np.minimum(times, c_sampled)
+
+        return X, times, censoring, treat_alloc, propensity_scores
+
+    def generate_dataframe(
+        self,
+        n_samples: Optional[int] = None,
+        prefix: str = "X_",
+        duration_col: str = "time",
+        event_col: str = "event",
+        treated_col: str = "treatment",
+        ps_col: str = "propensity_scores",
+        seed: _SeedType = None,
+    ):
+        """Generate dataframe."""
+        (
+            covariates,
+            times,
+            censoring,
+            treatments,
+            propensity_scores,
+        ) = self.generate_data(n_samples, seed=seed)
+        data = pd.DataFrame(covariates).add_prefix(prefix)
+        data[duration_col] = times
+        data[event_col] = 1 - censoring
+        data[treated_col] = treatments
+        data[ps_col] = propensity_scores
+        return data
+
+
+def features_normal_cov_uniform(
+    n_samples: int = 200,
+    n_features: int = 30,
+    dtype: Literal["float32", "float64"] = "float64",
+    seed: _SeedType = None,
+):
+    """Generate Normal features with uniform covariance.
+
+    An example of features obtained as samples of a centered Gaussian
+    vector with a specific covariance matrix given by 0.5 * (U + U.T),
+    where U is uniform on [0, 1] and diagonal filled by ones.
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of samples. Default=200.
+    n_features : int
+        Number of features. Default=30.
+    dtype : `{"float64", "float32"}`, optional
+        Type of the arrays used. Default='float64'
+    seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}, optional
+        The seed for reproducibility. Defaults to None.
+
+    Returns
+    -------
+    output : numpy.ndarray, shape=(n_samples, n_features)
+        n_samples realization of a Gaussian vector with the described
+        covariance
+    """
+    rng = np.random.default_rng(seed)
+    pre_cov = rng.uniform(size=(n_features, n_features)).astype(dtype)
+    np.fill_diagonal(pre_cov, 1.0)
+    cov = 0.5 * (pre_cov + pre_cov.T)
+    features = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
+    if dtype != "float64":
+        return features.astype(dtype)
+    return features
+
+
+def features_normal_cov_toeplitz(
+    n_samples: int = 200,
+    n_features: int = 30,
+    cov_corr: float = 0.5,
+    dtype: Literal["float32", "float64"] = "float64",
+    seed: _SeedType = None,
+):
+    """Generate normal features with toeplitz covariance.
+
+    An example of features obtained as samples of a centered Gaussian vector with
+    a toeplitz covariance matrix.
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of samples. Default=200.
+    n_features : int
+        Number of features. Default=30.
+    cov_corr : float
+        correlation coefficient of the Toeplitz correlation matrix. Default=0.5.
+    dtype : `{'float64', 'float32'}`, optional
+        Type of the arrays used. Default='float64'
+    seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}, optional
+        The seed for reproducibility. Defaults to None.
+
+    Returns
+    -------
+    output : numpy.ndarray, shape=(n_samples, n_features)
+        n_samples realization of a Gaussian vector with the described
+        covariance
+    """
+    rng = np.random.default_rng(seed)
+    cov: np.ndarray = toeplitz(cov_corr ** np.arange(0, n_features))
+    features = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
+    if dtype != "float64":
+        return features.astype(dtype)
+    return features
+
+
+def make_categorical(X, up_to: int = 25, seed: _SeedType = None):
+    """Convert continuous features in a dataset to categorical features.
+
+    This function takes a dataset matrix `X` and converts its first `up_to` columns
+    (features) into categorical features using the KBinsDiscretizer method.
+    It performs min-max scaling on each feature before discretization.
+
+    Parameters
+    ----------
+    X : np.ndarray
+        Input dataset matrix of shape (n_samples, n_features).
+    up_to : int, optional
+        Number of columns to convert to categorical features, by default 25.
+    seed : int or None, optional
+        Seed for the random number generator, by default None.
+
+    Returns
+    -------
+    np.ndarray, np.ndarray
+        Two arrays: `Xleft` containing the modified categorical features
+        and `Xright` containing the remaining original features.
+    """
+    rng = np.random.default_rng(seed)
+    Xleft = X[:, :up_to]
+    Xright = X[:, up_to:]
+    mm_normalizer = MinMaxScaler()
+    nbins_vector = rng.integers(2, 10, size=up_to)
+    for j, nbins in enumerate(nbins_vector):
+        # sklearn not supporting generator yet, pass int to random_state
+        # ref: https://github.com/scikit-learn/scikit-learn/issues/16988
+        seed_seq = rng.bit_generator._seed_seq.spawn(1)[0]  # type: ignore
+        random_state = seed_seq.generate_state(1)[0]
+        discretizer = KBinsDiscretizer(
+            n_bins=nbins, encode="ordinal", random_state=random_state
+        )
+        Xleft[:, j] = mm_normalizer.fit_transform(Xleft[:, j][:, None])[:, 0]
+        Xleft[:, j] = discretizer.fit_transform(Xleft[:, j][:, None])[:, 0]
+    return Xleft, Xright
+
+
+def generate_survival_data(
+    n_samples: int = 100,
+    ndim: int = 50,
+    censoring_factor: float = 0.7,
+    cate: float = 0.7,
+    prop_treated: float = 0.5,
+    ncategorical: int = 25,
+    na_proportion: float = 0.1,
+    dtype: Literal["float32", "float64"] = "float64",
+    seed: _SeedType = None,
+    use_cate: bool = True,
+):
+    """Generate simulated survival data.
+
+    Parameters
+    ----------
+    n_samples : int, optional
+        Number of samples in the generated dataset, by default 100.
+    ndim : int, optional
+        Number of total features, by default 50.
+    censoring_factor : float, optional
+        Factor influencing the amount of censoring, by default 0.7.
+    cate : float, optional
+        CATE (Conditional Average Treatment Effect) parameter, by default 0.7.
+    prop_treated : float, optional
+        Proportion of treated samples, by default 0.5.
+    ncategorical : int, optional
+        Number of categorical features, by default 25.
+    na_proportion : float, optional
+        Proportion of missing values, by default 0.1.
+    dtype : Literal["float32", "float64"], optional
+        Data type for the generated data, by default "float64".
+    seed : int or None, optional
+        Seed for the random number generator, by default None.
+    use_cate : bool, optional
+        Whether to use CATE (Conditional Average Treatment Effect), by default True.
+
+    Returns
+    -------
+    pd.DataFrame, np.ndarray
+        A pandas DataFrame containing the generated dataset with categorical and
+        continuous features, and an array of coefficients used in the simulation.
+    """
+    assert ncategorical <= ndim
+    rng = np.random.default_rng(seed)
+    simu_coxreg = CoxData(
+        n_samples,
+        ndim=ndim,
+        cate=cate,
+        prop_treated=prop_treated,
+        dtype=dtype,
+        seed=rng,
+        random_censoring=True,
+        censoring_factor=censoring_factor,
+        standardize_features=False,
+    )
+    X, T, C, treated, _ = simu_coxreg.generate_data(use_cate=use_cate)
+    # Will make first columns to be categorical
+    Xcat, Xcont = make_categorical(X, up_to=ncategorical)
+    # Build the final dataframe using appropriate column names and adding missing
+    # values
+    cols_dict = {}
+    X = np.concatenate((Xcat, Xcont), axis=1)
+    for i in range(Xcat.shape[1] + Xcont.shape[1]):
+        currentX = X[:, i].astype(dtype)
+        mask_na = rng.uniform(0, 1, X.shape[0]) > (1.0 - na_proportion)
+        currentX[mask_na] = np.nan
+        if i < Xcat.shape[1]:
+            colname = "cat_col"
+        else:
+            colname = "col"
+            i -= Xcat.shape[1]
+        cols_dict[f"{colname}_{i}"] = currentX
+
+    #  The absolute value is superfluous but just to be sure
+    cols_dict["T"] = np.abs(T)
+    cols_dict["E"] = (1.0 - C).astype("uint8")
+    cols_dict["treated"] = treated
+
+    df = pd.DataFrame(cols_dict)
+    # Final cast of categorical columns that was impossible due to nan in numpy
+    for i in range(Xcat.shape[1]):
+        df[f"cat_col_{i}"] = df[f"cat_col_{i}"].astype("Int64")
+
+    return df, simu_coxreg.coeffs
+
+
+def constant_cate(cate: float = 1.0) -> _FuncCateType:
+    """Wrap a constant function indicating the hazard ratio."""
+    return lambda _: cate
+
+
+def linear_cate(ndim: int, seed: _SeedType = None) -> _FuncCateType:
+    """Give the cate as the exponential of the linear combination of features.
+
+    Coefficients of the linear combination is randomly generated.
+
+    Parameters
+    ----------
+    ndim: int
+        Number of features
+    seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}, optional
+        The seed for reproducibility. Defaults to None.
+
+    Returns
+    -------
+    Callable:
+        A function that takes a row (sample) of features as input and returns
+        the cate (hazard ratio).
+    """
+    rng = np.random.default_rng(seed)
+    params = rng.uniform(-1, 1, ndim) / np.sqrt(ndim)
+    return lambda x: np.exp(np.sum(x * params))
+
+
+def random_cate(seed: _SeedType = None) -> _FuncCateType:
+    """Wrap a function giving random values of cate (hazard ratio)."""
+    rng = np.random.default_rng(seed)
+    return lambda _: np.exp(rng.normal(0, 0.4))
+
+
+def constant_propensity(prop_treated: float = 0.5) -> _FuncPropensityType:
+    """Wrap a constant function indicating the propensity scores."""
+    return lambda _: prop_treated
+
+
+def linear_propensity(
+    ndim: int,
+    overlap: float = 0,
+    prop_treated: float = 0.5,
+    seed: _SeedType = None,
+) -> _FuncPropensityType:
+    """Give the propensity scores as a linear combination of features.
+
+    Coefficients of the linear combination is randomly generated.
+
+    Parameters
+    ----------
+    ndim: int
+        Number of features
+    overlap: float, default=0
+        Parameter controlling the strength of interaction between features and
+        treatment allocation. The larger the strength, the weaker the overlap
+        between the distributions of propensity scores of the treated group and
+        the control group.
+    prop_treated: float, default=0.5
+        proportion of samples in the treated group if treatments were to be
+        assigned according to propensity scores generated by the returned
+        function.
+    seed: {None, int, Sequence[int], SeedSequence, BitGenerator, Generator}, optional
+        The seed for reproducibility. Defaults to None.
+
+    Returns
+    -------
+    Callable:
+        A function that takes a row (sample) of features as input and returns
+        the probability of the sample being treated.
+    """
+
+    def sigmoid(x):
+        return 1 / (1 + np.exp(-x))
+
+    rng = np.random.default_rng(seed)
+    params = (1.0 + overlap) * rng.uniform(-0.5, 0.5, ndim) / np.sqrt(ndim)
+    return lambda x: sigmoid(
+        np.log(prop_treated / (1.0 - prop_treated)) + np.sum(x * params)
+    )
+
+
+def jacobian_torch(y, x, create_graph=False):
+    """Compute the Jacobian of a vector-valued function with respect to its input.
+
+    This function calculates the Jacobian matrix of a vector-valued function 'y'
+    with respect to its input 'x', both represented as PyTorch tensors.
+    The function computes the partial derivatives of each component of 'y' with
+    respect to each element of 'x'.
+
+    Parameters
+    ----------
+    y : torch.Tensor
+        Output tensor of the vector-valued function, with shape (n_samples, n_outputs).
+    x : torch.Tensor
+        Input tensor with respect to which the Jacobian is computed, with shape
+        (n_samples, n_inputs).
+    create_graph : bool, optional
+        If True, create a computation graph to allow further differentiation,
+        by default False.
+
+    Returns
+    -------
+    torch.Tensor
+        Jacobian matrix of shape (n_samples, n_outputs, n_inputs), representing
+        the derivatives of each component of 'y' with respect to each element of 'x'.
+    """
+    jac = []
+    flat_y = y.reshape(-1)
+    grad_y = torch.zeros_like(flat_y)
+    for i in range(len(flat_y)):
+        grad_y[i] = 1.0
+        (grad_x,) = torch.autograd.grad(
+            flat_y, x, grad_y, retain_graph=True, create_graph=create_graph
+        )
+        jac.append(grad_x.reshape(x.shape))
+        grad_y[i] = 0.0
+    return torch.stack(jac).reshape(y.shape + x.shape)
+
+
+def hessian_torch(y, x):
+    """Compute the Hessian matrix of a scalar-valued function with respect to its input.
+
+    This function calculates the Hessian matrix of a scalar-valued function 'y' with
+    respect to its input 'x', both represented as PyTorch tensors.
+    The Hessian matrix represents the second-order partial derivatives of the function
+    'y' with respect to each pair of input elements.
+
+    Parameters
+    ----------
+    y : torch.Tensor
+        Output tensor of the scalar-valued function, with shape (n_samples,).
+    x : torch.Tensor
+        Input tensor with respect to which the Hessian is computed, with shape
+        (n_samples, n_inputs).
+
+    Returns
+    -------
+    torch.Tensor
+        Hessian matrix of shape (n_samples, n_inputs, n_inputs), representing
+        the second-order partial derivatives of the scalar-valued function 'y'
+        with respect to each pair of input elements.
+    """
+    return jacobian_torch(jacobian_torch(y, x, create_graph=True), x)
+
+
+def cox_partial_loglikelihood_breslow_torch(m, X, y):
+    """Calculate the Cox partial log-likelihood using the Breslow estimator.
+
+    This function computes the partial log-likelihood for the Cox Proportional Hazards
+    model using the Breslow estimator. The partial log-likelihood quantifies the
+    likelihood of observing the event times given the input features and
+    model parameters.
+
+    Parameters
+    ----------
+    m : CoxPHModelTorch
+        An instance of the CoxPHModelTorch class representing the Cox Proportional
+        Hazards model.
+    X : np.ndarray or torch.Tensor
+        Input feature matrix of shape (n_samples, n_features).
+    y : np.ndarray or torch.Tensor
+        Survival or event times, where positive values indicate observed events,
+        and non-positive values indicate censored observations. Should have the
+        same length as the number of samples.
+
+    Returns
+    -------
+    float
+        The negative of the Cox partial log-likelihood using the Breslow estimator.
+    """
+    # distinct event times
+    distinct_times = np.unique(y[y > 0])
+    Ds = []
+    Rs = []
+    for t in distinct_times:
+        Ds.append(np.where(y == t)[0])
+        Rs.append(np.where(np.abs(y) >= t)[0])
+    coxl = 0.0
+    beta = m.fc1.weight.permute((1, 0))
+    for i, t in enumerate(distinct_times):
+        XD = torch.from_numpy(X[Ds[i], :])
+        XR = torch.from_numpy(X[Rs[i], :])
+
+        if XD.shape[0] > 0:
+            coxl += torch.dot(beta[:, 0], XD.sum(axis=0))  # type: ignore
+        if XR.shape[0] > 0:
+            expbetaX = m(XR)
+            coxl -= float(len(Ds[i])) * torch.log(expbetaX.sum(axis=(0)))[0]
+
+    return -coxl
+
+
+def cox_partial_loglikelihood_breslow_torch_from_prediction(y_pred, y):
+    """Compute the partial loglikelihood from the prediction y_pred of the model.
+
+    This prediction corresponds to the score beta^T.Z of a model, before taking the
+    exponential.
+
+    Parameters
+    ----------
+    y_pred: torch.Tensor
+        Prediction of the model, corresponding to the scores.
+        The smaller the score is, the longer the survival.
+    y: np.ndarray
+        contains the labels of the data. Negative value corresponds to censored data
+    Returns
+    -------
+        torch.Tensor: the negative log-lokelihood of Breslow-Cox
+    """
+    distinct_times = np.unique(y[y > 0])
+    Ds = []
+    Rs = []
+    for t in distinct_times:
+        Ds.append(np.where(y == t)[0])
+        Rs.append(np.where(np.abs(y) >= t)[0])
+    coxl = 0.0
+    for i, t in enumerate(distinct_times):
+        coxl += y_pred[Ds[i]].sum()
+        if len(Rs[i]) > 0:
+            expbetaX = torch.exp(y_pred[Rs[i]])
+            coxl -= float(len(Ds[i])) * torch.log(expbetaX.sum(axis=0))  # type: ignore
+    return -coxl
+
+
+def analytical_gradient_cox_partial_loglikelihood_breslow_torch(m, X, y):
+    """Calculate Cox partial log-likelihood's gradient using Breslow estimator.
+
+    This function computes the analytical gradient of the partial log-likelihood
+    for the Cox Proportional Hazards model using the Breslow estimator.
+    The gradient is computed with respect to the model's weights.
+
+    Parameters
+    ----------
+    m : CoxPHModelTorch
+        An instance of the CoxPHModelTorch class representing the Cox
+        Proportional Hazards model.
+    X : np.ndarray or torch.Tensor
+        Input feature matrix of shape (n_samples, n_features).
+    y : np.ndarray or torch.Tensor
+        Survival or event times, where positive values indicate observed events,
+        and non-positive values indicate censored observations. Should have the same
+        length as the number of samples.
+
+    Returns
+    -------
+    torch.Tensor
+        The analytical gradient of the partial log-likelihood with respect to
+        the model's weights. The shape of the tensor matches the shape of the model's
+        weight tensor.
+    """
+    # Obs = (y > 0).sum()
+
+    # distinct event times
+    with torch.no_grad():
+        distinct_times = np.unique(y[y > 0])
+        Ds = []
+        Rs = []
+        for t in distinct_times:
+            Ds.append(np.where(y == t)[0])
+            Rs.append(np.where(np.abs(y) >= t)[0])
+        grad = torch.zeros_like(m.fc1.weight)
+
+        for i, t in enumerate(distinct_times):
+            XD = torch.from_numpy(X[Ds[i], :])
+            XR = torch.from_numpy(X[Rs[i], :])
+            expbetaX = m(XR)
+            num = torch.mul(XR, expbetaX).sum(axis=0)  # type: ignore
+            den = expbetaX.sum(axis=(0, 1))
+            grad += XD.sum(axis=0) - float(  # type: ignore
+                len(Ds[i])
+            ) * num / torch.max(den, torch.ones_like(den) * 1e-16)
+
+    return -grad
+
+
+def random_treatment_allocation(
+    n_samples: int,
+    prop_treated: float = 0.5,
+    shuffle: bool = False,
+    seed: _SeedType = None,
+) -> NDArray[np.uint8]:
+    """Perform random treatment allocation for a given number of samples.
+
+    This function generates a random allocation of treatments to samples
+    based on the specified proportion of treated samples. The allocation can
+    be optionally shuffled.
+
+    Parameters
+    ----------
+    n_samples : int
+        Total number of samples.
+    prop_treated : float, optional
+        Proportion of treated samples, by default 0.5.
+    shuffle : bool, optional
+        Whether to shuffle the treatment allocation, by default False.
+    seed : int or None, optional
+        Seed for the random number generator, by default None.
+
+    Returns
+    -------
+    NDArray[np.uint8]
+        An array of treatment allocations, where 1 represents treated and 0
+        represents control. The array has a shape of (n_samples,).
+
+    Examples
+    --------
+    >>> allocations = random_treatment_allocation(
+    >>>    n_samples=100, prop_treated=0.3, shuffle=True
+    >>> )
+    >>> num_treated = np.sum(allocations)
+    >>> num_control = n_samples - num_treated
+    >>> print(f"Number of treated samples: {num_treated}")
+    >>> print(f"Number of control samples: {num_control}")
+    """
+    rng = np.random.default_rng(seed)
+
+    prop_control = 1 - prop_treated
+    n_treated = int(n_samples * prop_treated)
+    n_control = int(n_samples * prop_control)
+    n_remained = n_samples - n_treated - n_control  # 1 or 0
+
+    allocations = np.concatenate(
+        (
+            np.repeat(1, n_treated),
+            np.repeat(0, n_control),
+            np.repeat(
+                rng.choice([1, 0], size=1, p=[prop_treated, prop_control]),
+                n_remained,
+            ),
+        )
+    ).astype("uint8")
+    if shuffle:
+        rng.shuffle(allocations)
+
+    return allocations
+
+
+class CoxPHModelTorch(torch.nn.Module):
+    """Cox Proportional Hazards Model implemented using PyTorch.
+
+    This class defines a Cox Proportional Hazards model as a PyTorch Module.
+    The model takes input features and predicts the exponentiated linear term.
+    The exponentiated linear term can be interpreted as the hazard ratio.
+
+    Parameters
+    ----------
+    ndim : int, optional
+        Number of input dimensions or features, by default 10.
+    torch_dtype : torch.dtype, optional
+        Data type for PyTorch tensors, by default torch.float64.
+    """
+
+    def __init__(self, ndim=10, torch_dtype=torch.float64):
+        """Initialize the CoxPHModelTorch.
+
+        Parameters
+        ----------
+        ndim : int, optional
+            Number of input dimensions or features, by default 10.
+        torch_dtype : torch.dtype, optional
+            Data type for PyTorch tensors, by default torch.float64.
+        """
+        super().__init__()
+        self.ndim = ndim
+        self.torch_dtype = torch_dtype
+        self.fc1 = torch.nn.Linear(self.ndim, 1, bias=False).to(self.torch_dtype)
+        self.fc1.weight.data.fill_(0.0)
+
+    def forward(self, x):
+        """Perform a forward pass through the CoxPH model.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape (batch_size, ndim).
+
+        Returns
+        -------
+        torch.Tensor
+            Predicted exponentiated linear term (hazard ratio).
+        """
+        return torch.exp(self.fc1(x))  # pylint: disable=not-callable
+
+
+class MockStepSizer:
+    """A mock step sizer class for illustrative purposes.
+
+    This class represents a simple mock step sizer that doesn't perform any actual step
+    size calculation. It always returns a constant step size of 1.0 when the `next()`
+    method is called.
+    """
+
+    def __init__(self):
+        """Init method for the class."""
+        pass
+
+    def update(self, *args, **kwargs):
+        """Update the state of the mock step sizer.
+
+        Parameters
+        ----------
+        *args, **kwargs : arguments and keyword arguments
+            Additional arguments that might be used to update the state (ignored
+            in this mock implementation).
+
+        Returns
+        -------
+        self : MockStepSizer
+            Returns the instance of the MockStepSizer class itself.
+        """
+        return self
+
+    def next(self):
+        """Get the next step size.
+
+        Returns
+        -------
+        float
+            The constant step size of 1.0.
+        """
+        return 1.0
+
+
+def compute_q_k(
+    X_norm,
+    y,
+    scaled_variance_matrix,
+    distinct_event_times,
+    weights_counts_on_events,
+    risk_phi,
+    risk_phi_x,
+    score,
+    weights,
+):
+    """Compute local bricks for Q.
+
+    Parameters
+    ----------
+    X_norm : np.ndarray
+        _description_
+    y : np.ndarray
+        _description_
+    scaled_variance_matrix : np.ndarray
+        _description_
+    distinct_event_times : list[float]
+        _description_
+    weights_counts_on_events : list[float]
+        _description_
+    risk_phi : list[float]
+        _description_
+    risk_phi_x : list[np.ndarray]
+        _description_
+    score : np.ndarray
+        _description_
+    weights : np.ndarray
+        _description_
+
+    Returns
+    -------
+    tuple(np.ndarray, np.ndarray, np.ndarray)
+        _description_
+    """
+    n, n_features = X_norm.shape
+    phi_k = np.zeros((n, n_features))
+    current_client_indices = np.arange(n).tolist()
+    weights_counts_on_events_cumsum = np.concatenate(
+        [wc.reshape((1, 1)) for wc in weights_counts_on_events],
+        axis=0,
+    )
+    s0s_cumsum = np.concatenate(
+        [risk_phi_s.reshape((1, 1)) for risk_phi_s in risk_phi],
+        axis=0,
+    )
+    s1s_cumsum = np.concatenate(
+        [risk_phi_x_s.reshape((1, n_features)) for risk_phi_x_s in risk_phi_x],
+        axis=0,
+    )
+    # # of size (i + 1, n_features) this should be term by term
+    s1_over_s0_cumsum = s1s_cumsum / (s0s_cumsum)
+
+    # The division should happen term by term
+    weights_over_s0_cumsum = weights_counts_on_events_cumsum / s0s_cumsum
+
+    for i in current_client_indices:
+        # This is the crux of the implementation, we only have to sum on times
+        # with events <= ti
+        # as otherwise delta_j = 0 and therefore the term don't contribute
+        ti = np.abs(y[i])
+
+        compatible_event_times = [
+            idx for idx, td in enumerate(distinct_event_times) if td <= ti
+        ]
+        # It can happen that we have a censorship that happens before any
+        # event times
+        if len(compatible_event_times) > 0:
+            # distinct_event_times is sorted so we can do that
+            max_distinct_event_times = max(compatible_event_times)
+            # These are the only indices of the sum, which will be active
+            not_Rs_i = np.arange(max_distinct_event_times + 1)
+
+            # Quantities below are global and used onl alread shared quantities
+            s1_over_s0_in_sum = s1_over_s0_cumsum[not_Rs_i]
+            weights_over_s0_in_sum = weights_over_s0_cumsum[not_Rs_i]
+
+        else:
+            # There is nothing in the sum we'll add nothing
+            s1_over_s0_in_sum = np.zeros((n_features))
+            weights_over_s0_in_sum = 0.0
+
+        # Second term and 3rd term
+        phi_i = -score[i] * (
+            weights_over_s0_in_sum * (X_norm[i, :] - s1_over_s0_in_sum)
+        ).sum(axis=0).reshape((n_features))
+
+        # First term
+        if y[i] > 0:
+            phi_i += (
+                X_norm[i, :]
+                - risk_phi_x[max_distinct_event_times][None, :]
+                / risk_phi[max_distinct_event_times][None, None]
+            ).reshape((n_features))
+
+        # We recallibrate by w_i only at the very end in here we deviate a
+        # bit from Binder ?
+
+        phi_k[i] = phi_i * weights[i]
+
+    # We have computed scaled_variance_matrix globally this delta_beta is
+    # (n_k, n_features)
+    delta_betas_k = phi_k.dot(scaled_variance_matrix)
+    # This Qk is n_features * n_features we will compute Q by block
+    Qk = delta_betas_k.T.dot(delta_betas_k)
+
+    return phi_k, delta_betas_k, Qk
+
+
+def robust_sandwich_variance_distributed(
+    X_norm, y, scaled_beta, weights, scaled_variance_matrix, n_clients=3
+):
+    """Compute the robust sandwich variance estimator.
+
+    This function computes the robust sandwich variance estimator for the Cox
+    model. The sandwich variance estimator is a robust estimator of the variance
+    which accounts for the lack of dependence between the samples due to the
+    introduction of weights for example.
+
+    X_norm : np.ndarray or torch.Tensor
+        Input feature matrix of shape (n_samples, n_features).
+    y : np.ndarray or torch.Tensor
+        Survival or event times, where positive values indicate observed events,
+        and non-positive values indicate censored observations. Should have the
+        same length as the number of samples.
+    scaled_beta : np.ndarray or torch.Tensor
+        The model's coefficients, with shape (n_features,).
+    weights : np.ndarray or torch.Tensor
+        Weights associated with each sample, with shape (n_samples,)
+    scaled_variance_matrix : np.ndarray or torch.Tensor
+        Classical scaled variance of the Cox model estimator.
+    """
+    n_samples = X_norm.shape[0]
+
+    np.random.seed(42)
+    samples_repartition = np.random.choice(n_clients, size=n_samples)
+
+    # This part is already computed for WebDisco
+    # in fact we have the D_i and R_i by client and share global distinct_times
+
+    score = np.exp(np.dot(X_norm, scaled_beta))
+
+    # np.unique already sorts values to need to call sort another time
+    distinct_event_times = np.unique(np.abs(y)).tolist()
+
+    Ds = []
+    Rs = []
+    for t in distinct_event_times:
+        Ds.append(np.where(y == t)[0])
+        Rs.append(np.where(np.abs(y) >= t)[0])
+
+    # This is assumed to be globally available
+    # for risk_phi each element is a scalar
+    risk_phi = []
+    # for risk_phi_x each element is of the dimension of a feature N,
+    risk_phi_x = []
+    weights_counts_on_events = []
+
+    for i, _ in enumerate(distinct_event_times):
+        risk_phi_x.append(
+            np.sum(X_norm[Rs[i], :] * (weights[Rs[i]] * score[Rs[i]])[:, None], axis=0)
+        )
+        risk_phi.append(np.sum((weights[Rs[i]] * score[Rs[i]])[:, None]))
+        weights_counts_on_events.append(weights[Ds[i]].sum())
+    # Iterate forwards
+    Q = []
+
+    for k in range(n_clients):
+        indices_client_k = np.where(samples_repartition == k)
+        X_norm_k = X_norm[indices_client_k]
+        y_k = y[indices_client_k]
+        weights_k = weights[indices_client_k]
+        score_k = score[indices_client_k]
+        _, _, Qk = compute_q_k(
+            X_norm_k,
+            y_k,
+            scaled_variance_matrix,
+            distinct_event_times,
+            weights_counts_on_events,
+            risk_phi,
+            risk_phi_x,
+            score_k,
+            weights_k,
+        )
+        # Communication to the server
+        Q.append(Qk)
+
+    # We sum each block
+    Q = sum(Q)
+    return np.sqrt(np.diag(Q))
+
+
+def robust_sandwich_variance_pooled(
+    X_norm, y, scaled_beta, weights, scaled_variance_matrix
+):
+    """Compute the robust sandwich variance estimator.
+
+    This function computes the robust sandwich variance estimator for the Cox
+    model. The sandwich variance estimator is a robust estimator of the variance
+    which accounts for the lack of dependence between the samples due to the
+    introduction of weights for example.
+    X_norm : np.ndarray or torch.Tensor
+        Input feature matrix of shape (n_samples, n_features).
+    y : np.ndarray or torch.Tensor
+        Survival or event times, where positive values indicate observed events,
+        and non-positive values indicate censored observations. Should have the
+        same length as the number of samples.
+    scaled_beta : np.ndarray or torch.Tensor
+        The model's coefficients, with shape (n_features,).
+    weights : np.ndarray or torch.Tensor
+        Weights associated with each sample, with shape (n_samples,)
+    scaled_variance_matrix : np.ndarray or torch.Tensor
+        Classical scaled variance of the Cox model estimator.
+    """
+    n_samples, n_features = X_norm.shape
+
+    score_residuals = np.zeros((n_samples, n_features))
+
+    phi_s = np.exp(np.dot(X_norm, scaled_beta))
+
+    distinct_times = sorted(np.unique(np.abs(y)))
+    Ds = []
+    Rs = []
+    for t in distinct_times:
+        Ds.append(np.where(np.abs(y) == t)[0])
+        Rs.append(np.where(np.abs(y) >= t)[0])
+
+    risk_phi_x_history = np.zeros((n_samples, n_features))
+    risk_phi_history = np.zeros(n_samples)
+    for i, t in enumerate(distinct_times):
+        for j in Ds[i]:
+            risk_phi_x_history[j, :] = np.sum(
+                X_norm[Rs[i], :] * (weights[Rs[i]] * phi_s[Rs[i]])[:, None], axis=0
+            )
+            risk_phi_history[Ds[i]] = np.sum((weights[Rs[i]] * phi_s[Rs[i]])[:, None])
+    # Iterate forwards
+    for i, t in enumerate(distinct_times):
+        for j in Ds[i]:
+            not_Rs = set(np.arange(n_samples)) - set(Rs[i])
+            not_Rs = list((not_Rs.union(np.array([j]))))
+            score_residuals[j, :] = -phi_s[j] * (
+                ((y > 0)[not_Rs] * weights[not_Rs] / risk_phi_history[not_Rs])[:, None]
+                * (
+                    X_norm[j, :]
+                    - risk_phi_x_history[not_Rs] / risk_phi_history[not_Rs][:, None]
+                )
+            ).sum(axis=0)
+
+            if y[j] > 0:
+                score_residuals[j, :] += (
+                    X_norm[j, :] - risk_phi_x_history[j] / risk_phi_history[j]
+                )
+
+    score_residuals = score_residuals * weights[:, None]
+
+    delta_betas = score_residuals.dot(scaled_variance_matrix)
+    tested_var = delta_betas.T.dot(delta_betas)
+    return np.sqrt(np.diag(tested_var))
diff --git a/fedeca/utils/tensor_utils.py b/fedeca/utils/tensor_utils.py
new file mode 100644
index 00000000..e8272243
--- /dev/null
+++ b/fedeca/utils/tensor_utils.py
@@ -0,0 +1,58 @@
+"""Functions for tensors comparison."""
+import numpy as np
+import torch
+
+
+def compare_tensors_lists(tensor_list_a, tensor_list_b, rtol=1e-5, atol=1e-8):
+    """Compare list of tensors up to a certain precision.
+
+    The criteria that is checked is the following: |x - y| <= |y| * rtol + atol.
+    So there are two terms to consider. The first one is relative (rtol) and the second
+    is absolute (atol). The default for atol is a bit low for float32 tensors. We keep
+    the defaults everywhereto be safe except in the tests computed gradients wrt theory
+    where we raise atol to 1e-6. It makes sens in this case because it matches the
+    expected precision for slightly different float32 ops that should theoretically
+    give the exact same result.
+
+    Parameters
+    ----------
+    tensor_list_a: list
+        a list of tensors
+    tensor_list_b : list
+        a list of tensors
+    atol : float, optional
+        absolute difference tolerance for tensor-to-tensor comparison. Default to 1e-5.
+    rtol : float, optional
+        relative difference tolerance for tensor-to-tensor comparison. Default to 1e-8.
+    """
+    if isinstance(tensor_list_a[0], torch.Tensor) and isinstance(
+        tensor_list_b[0], torch.Tensor
+    ):
+        backend = "pytorch"
+
+    elif isinstance(tensor_list_a[0], np.ndarray) and isinstance(
+        tensor_list_b[0], np.ndarray
+    ):
+        backend = "numpy"
+
+    else:
+        raise RuntimeError(
+            """Either the tensors you passed do not have the same type
+               or the type is unsupported."""
+        )
+
+    if backend == "pytorch":
+        assert all(
+            [
+                torch.allclose(u, d, rtol=rtol, atol=atol)
+                for u, d in zip(tensor_list_a, tensor_list_b)
+            ]
+        )
+
+    elif backend == "numpy":
+        assert all(
+            [
+                np.allclose(u, d, rtol=rtol, atol=atol)
+                for u, d in zip(tensor_list_a, tensor_list_b)
+            ]
+        )
diff --git a/fedeca/utils/typing.py b/fedeca/utils/typing.py
new file mode 100644
index 00000000..ad33a86d
--- /dev/null
+++ b/fedeca/utils/typing.py
@@ -0,0 +1,22 @@
+"""Module defining type alias."""
+from collections.abc import Callable, Sequence
+from typing import Any, Optional, Union
+
+from numpy.random import BitGenerator, Generator, SeedSequence
+
+# following the typing of numpy.random
+_SeedType = Optional[
+    Union[
+        int,
+        Sequence[int],
+        BitGenerator,
+        SeedSequence,
+        Generator,
+    ]
+]
+
+# function defining for each patient the probability of being in the treatment group
+_FuncPropensityType = Callable[[Sequence[Any]], float]
+
+# function defining for each patient the treatment effect as hazard ratio.
+_FuncCateType = Callable[[Sequence[Any]], float]
diff --git a/fedeca/viz/plot.py b/fedeca/viz/plot.py
new file mode 100644
index 00000000..3068f0e5
--- /dev/null
+++ b/fedeca/viz/plot.py
@@ -0,0 +1,104 @@
+"""File for experiment figure plots."""
+import itertools
+from typing import Optional
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from matplotlib import colors as mcolors
+from matplotlib.axes import Axes
+
+# create custom color palette
+owkin_palette = {
+    "owkin_pink": "#FFBDE0",
+    "owkin_teal": "#32C6C6",
+    "owkin_mustard": "#FFC200",
+    "owkin_blue": "#1439C1",
+    "owkin_magenta": "#F70B9D",
+    "owkin_bright_blue": "#009EFF",
+    "owkin_green": "#2CB546",
+    "owkin_stone": "#D6CDC7",
+    "owkin_black": "#000000",
+}
+
+
+def setup_owkin_colors_palette():
+    """Add custom Owkin colors to mcolors."""
+    owkin_palette_pd = pd.DataFrame(
+        {"name": owkin_palette.keys(), "color": owkin_palette.values()}
+    )
+
+    c = dict(zip(*owkin_palette_pd.values.T))
+    mcolors.get_named_colors_mapping().update(c)
+
+
+def plot_power(
+    df_res: pd.DataFrame,
+    fit_curve: bool = False,
+    deg: int = 2,
+    plot_kwargs: Optional[dict] = None,
+) -> Axes:
+    """Plot power or type I error figure for given experiment.
+
+    Parameters
+    ----------
+    event_col : str, optional
+        Column name for event indicator, by default "event".
+    df_res : pd.DataFrame
+        Results of experiment containing in every row the results
+        for an experiment configuration and a column "p" with the p-value.
+    fit_curve : bool, optional
+        Interpolate the datapoints. Defaults to False.
+    deg : int, optional
+        Degree of polynomial for interpolation. Defaults to 2.
+    plot_kwargs : Optional[dict], optional
+        Parameter to plot on the xaxis if different from n_samples. Defaults to None.
+
+    Returns
+    -------
+    matplotlib.axes._axes.Axes
+        Power or type I error plot
+    """
+    setup_owkin_colors_palette()
+    if plot_kwargs is None:
+        plot_kwargs = {}
+    plot_kwargs.setdefault("xlabel", "n_samples")
+    param_vary = plot_kwargs["xlabel"]
+    axis = plot_kwargs.pop("ax", plt.gca())
+
+    df_power = (
+        df_res.groupby(["method", param_vary])
+        .agg(
+            power=pd.NamedAgg(column="p", aggfunc=lambda x: (x < 0.05).sum() / x.size),
+        )
+        .reset_index()
+    )
+    owkin_colors = itertools.cycle(owkin_palette.keys())
+    markers = ["d", "v", "s", "^", "*"]
+    markers = itertools.cycle(markers)
+    for name, group in df_power.groupby("method"):
+        if (xlabel := plot_kwargs.pop("xlabel", None)) is not None:
+            axis.set_xlabel(xlabel)
+        if (ylabel := plot_kwargs.pop("ylabel", None)) is not None:
+            axis.set_ylabel(ylabel)
+        owkin_color = next(owkin_colors)
+        marker = next(markers)
+        if fit_curve:
+            fit = np.poly1d(np.polyfit(group[param_vary], group["power"], deg=deg))
+            axis.plot(group[param_vary], fit(group[param_vary]), color=owkin_color)
+        axis.scatter(
+            param_vary,
+            "power",
+            data=group,
+            label=name,
+            color=owkin_color,
+            marker=marker,
+            **plot_kwargs,
+        )
+        axis.legend()
+
+    return axis
+
+
+if __name__ == "__main__":
+    setup_owkin_colors_palette()
diff --git a/fedeca/viz/utils.py b/fedeca/viz/utils.py
new file mode 100644
index 00000000..270e1c21
--- /dev/null
+++ b/fedeca/viz/utils.py
@@ -0,0 +1,26 @@
+"""Utilities for visualisation."""
+import matplotlib.pyplot as plt
+from matplotlib.offsetbox import VPacker
+
+
+def adjust_legend_subtitles(legend):
+    """Make invisible-handle "subtitles" entries look more like titles.
+
+    Adapted from:
+        https://github.com/mwaskom/seaborn/blob/63d91bf0298009effc9de889f7f419f59deb3960/seaborn/utils.py#L830
+    Reference for `offsite_points`:
+        https://stackoverflow.com/questions/24787041/multiple-titles-in-legend-in-matplotlib
+        https://github.com/matplotlib/matplotlib/blob/3180c94d84f4aeb8494e3fde9e39f5f7e4e244b6/lib/matplotlib/legend.py#L926
+    """
+    # Legend title not in rcParams until 3.0
+    title_font_size = plt.rcParams.get("legend.title_fontsize", None)
+    offset_points = legend._fontsize * legend.handletextpad
+    hpackers = legend.findobj(VPacker)[0].get_children()
+    for hpack in hpackers:
+        draw_area, text_area = hpack.get_children()
+        handles = draw_area.get_children()
+        if not all(artist.get_visible() for artist in handles):
+            draw_area.set_width(-offset_points)
+            for text in text_area.get_children():
+                if title_font_size is not None:
+                    text.set_size(title_font_size)
diff --git a/license.md b/license.md
new file mode 100644
index 00000000..d627f5ab
--- /dev/null
+++ b/license.md
@@ -0,0 +1,339 @@
+# License Terms and Conditions
+
+
+
+By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this License Terms and Conditions ("License") from Owkin Inc. a Delaware corporation, with an office located at 831 Broadway, Unit 3R, New York, NY 10003 USA (the “Licensor”). You are granted with the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
+If You are an individual and are agreeing to be bound by the terms of this License on behalf of your employer or other entity, You represent and warrant to the Licensor that you have full legal authority to bind your employer or such entity to this License. Failing this authority, You may not accept the License or access the Licensed Material on behalf of Your employer or other entity.
+For the sake of clarity, this License is limited to grant non-profit entities with the rights defined hereunder. If You are a for-profit entity, You are invited to directly contact the Licensor for negotiating a separate agreement.
+
+## SECTION 1 – DEFINITIONS
+For the purposes of this License:
+“Derivative Work”
+
+
+means any work or material that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified or included in a manner requiring permission under the Copyright and Similar Rights or Patent Rights held by the Licensor.
+
+
+“Derived License”
+
+
+means the license You apply to Your Derivative Work and/or Results in accordance with the terms and conditions of this License.
+
+
+“Documentation”
+
+
+means any owner's manuals, user's manuals, installation instructions, operating instructions, concepts of operations, and other similar items, related to the Licensed Material, regardless of storage medium, that explain the capabilities of the computer software or algorithm or provide instructions for using the software or algorithm.
+
+
+“Copyright and Similar Rights”
+
+
+means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
+
+
+“Effective Technological Measures”
+
+
+means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
+
+
+“Licensed Material” 
+
+
+means any software, algorithm, source code, object code, software, database, or other material that is made available by the Licensor under the terms of this License. Licensed Material excludes any material, algorithm, software, source code, object code, database, document, information and/or any data of or related to the Licensor that is not expressly made available by the Licensor under this License, whether or not related to the Licensed Material
+
+
+“Licensed Rights”
+
+
+means the rights granted to You subject to the terms and conditions of this License, which are limited to all Copyright and Similar Rights and Patent Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
+
+
+“Licensor”
+
+
+means the individual(s) or entity(ies) granting rights under this License, as stated above.
+
+
+“Non-Commercial”
+
+
+Means for non-commercial (research) purposes by non-profit entity only. Non-Commercial excludes any use, sale, lease, license, or other transfer of the Licensed Material, Derivative Works or Results by or to a for-profit organization. Non-Commercial shall also exclude uses of the Licensed Material, Derivative Works or Results by any organization, including You, to perform contract research, to screen compound libraries, to produce or manufacture product for general sales, or to conduct research activities that result in any sale, lease, license, or transfer of the Licensed Material, Derivative Works or Results to a for-profit organization.
+
+
+“Patent Rights”
+
+Mean the European patent application No. 23306845.1, filed on 20 October 2023, owned by the Licensor, titled “Federated External Control Arms Method for Privacy-Enhanced Causal Inference on Distributed Data” and quoting: du TERRAIL, Jean;  KLOPFENSTEIN, Quentin; LI, Honghao; MAYER, Imke; HALLAL, Mohammad; LOISEAU, Nicolas; ANDREUX, Mathieu; and BALAZARD, Felix as inventors and any foreign patent application corresponding thereto, and any divisional, additions, continuations, continuations in part, or re-examination application, and each patent that issues or reissues from any of these patent applications. For the avoidance of doubt, the Patent Rights include Supplementary Protection Certificates and other extension of similar nature.
+
+“Results”
+
+
+means any results, discovery or creation, of any kind and in any form whatsoever, whether patentable or not, which results from the use of the Licensed Material, excluding Derivative Works.
+
+
+“Share”
+
+
+means to provide to a third party or the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make available to a third party or the public including in ways that members of the public may access from a place and at a time individually chosen by them.
+
+
+“Sui Generis Database Rights”
+
+
+means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
+
+
+“Third Party Material”
+
+
+means any third-party software, algorithm, source code, object code, data, database, material or other components (including free and open source software).
+
+
+“You”
+
+
+means the individual or entity exercising the Licensed Rights under this License. Your has a corresponding meaning.
+
+
+The definitions referred to in this Section apply to both singular and plural terms.
+
+## SECTION 2 – SCOPE
+a.	License grant
+1.	Scope. Subject to the terms and conditions of this License and to Your compliance with the Documentation, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-transferable, non-exclusive, revocable and limited license to exercise the Licensed Rights in the Licensed Material to:
+A.	reproduce and Share the Licensed Material, in whole or in part, for Non-Commercial purposes only; and
+B.	produce, reproduce, and Share Derivative Work for Non-Commercial purposes only; and
+C.	produce, reproduce, and Share the Results for Non-Commercial purposes only. Any use of the Results other than Non-Commercial use shall be prior approved by the Licensor.
+In any case, and without limiting the foregoing, You may not use the Licensed Material for any commercial purpose, or in connection with any patent application (or similar attempts to claim an intellectual property right). You may make a reasonable number of copies of the Documentation solely for use in connection with the license to the Licensed Material granted above
+The grant of rights expressly set forth in this Section 2 are the complete grant of rights to you in the Licensed Material, and no other licenses are granted, whether by waiver, estoppel, implication, equity or otherwise. Licensor reserve all rights not expressly granted by this License. Should you want to obtain other rights on the License Material, including commercial rights, You are invited to directly contact the Licensor for negotiating a separate agreement, including financial conditions of such rights.
+The foregoing License is personal to you, and you may not assign or sublicense this License or any other rights or obligations under this License without Owkin’s prior written consent; any such assignment or sublicense will be void and will automatically and immediately terminate this License.
+2.	Exceptions and Limitations. For the avoidance of doubt, this License is not intended to limit fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material in accordance with applicable laws.
+3.	Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this License, simply making modifications authorized by this Section 2(a)(3) never produces Derivative Work.
+4.	Downstream recipients.
+A.	Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this License.
+B.	Additional offer from the Licensor – Derivative Works. Every recipient of Derivative Work from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Derivative Work under the conditions of this License.
+C.	No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
+5.	No endorsement. Nothing in this License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(b)(1)(A)(i).
+b.	Other rights
+1.	Moral Rights. Moral rights, such as the right of integrity, are not licensed under this License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
+2.	Industrial Property Rights. Subject to the terms and conditions of this License, the Licensor hereby grants to You a worldwide, royalty-free, non-sublicensable, non-transferable, non-exclusive, revocable and limited license on Patent Rights to make, have made, use, offer to sell, sell, import, and otherwise transfer the Licensed Material and Derivative Work for the sole purpose and in accordance with the terms and conditions of the License and, where such license applies only to those Patent Rights claims licensable by Licensor that are necessarily infringed by use of the Licensed Material or Derivative Work. No other rights on any other intellectual property rights of the Licensor, including patents and trademark rights, are licensed under this License.
+3.	Royalties. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for Non-Commercial purposes.
+
+## SECTION 3 – LICENSE CONDITIONS
+Your exercise of the Licensed Rights is expressly made subject to the following conditions.
+Use
+
+Scope limitation. You will not, and will not permit, assist or cause any third party:
+
+to use, modify, copy, reproduce, create derivative works of, or distribute the Licensed Material and Derivative Works (or any works linked thereof or incorporating the Licensed Material), in whole or in part, for any purposes other than Non-Commercial, including for commercial or production purposes, 
+to use, produce, reproduce or Share the Results for other purposes than Non-Commercial, without the prior written approval of the Licensor.
+Infringement. You will not, and will not permit, assist or cause any third party, to use, modify, copy, reproduce, create derivative works of, or distribute the Licensed Material, and any Third Party Material included therein, and Derivative Works (or any works linked thereof or incorporating the Licensed Material), in whole or in part, in any manner that infringes, misappropriates, or otherwise violates any third-party rights, You will not, and will not permit, assist or cause any third party to alter or remove copyright and other proprietary notices which appear on or in the License Material.3.	
+3.	Compliance. You will not, and will not permit, assist or cause any third party to use, modify, copy, reproduce, create derivative works of, or distribute the Licensed Material, and any Third Party Material included therein, and Derivative Works (or any works linked thereof or incorporating the Licensed Material), in whole or in part, in any manner that violates any applicable law and violating any privacy or security laws, rules, regulations, directives, or governmental requirements (including the General Data Privacy Regulation (Regulation (EU) 2016/679), as well as all amendments and successor laws to any of the foregoing.
+b.	Attribution
+1.	If You Share the Licensed Material (including in modified form), You must:
+A.	retain the following if it is supplied by the Licensor with the Licensed Material:
+i.	identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
+ii.	a copyright notice;
+iii.	a notice that refers to this License;
+iv.	a notice that refers to the disclaimer of warranties;
+v.	a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
+B.	indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
+C.	indicate the Licensed Material is licensed under this License, and include the text of, or the URI or hyperlink to, this License.
+2.	You may satisfy the conditions in Section 3(b)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
+3.	If requested by the Licensor, You must remove any of the information required by Section 3(b)(1)(A) to the extent reasonably practicable.
+4.	If You Share Derivative Works You produce, the Derived License You apply must not prevent recipients of the Derivative Work from complying with this License.
+c.	ShareAlike
+1.	In addition to the conditions in Section 3 (a) and 3(b), if You Share Licensed Material and/or Derivative Works or Results You produce, the following conditions also apply.
+2.	The Derived License You apply must have the same terms and conditions than the License, this version or later. In particular, recipients of the Licensed Material, of Your Derivatives Work or of Your Results shall not use them, or any work or material subject to Copyright and Similar Rights that is derived from or based upon them, or results obtained through their use, for any purposes other than Non-Commercial.
+3.	You must include the text of, or the URI or hyperlink to, the Derived License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Derivative Works or Results.
+4.	You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Derivative Works or Results that modify the rights granted under this License.
+5. 	You must grant any recipients with any necessary rights of use or license on any intellectual property rights, including patents or trademarks, You may own or co-own on the Derivative Works or Results to permit and allow the use by the recipients of the Derivative Works or Results in accordance with the terms of the License, and procure that the recipients grant the same on any intellectual property rights that such recipients may generate through their use of Licensed Material, Derivatives Work or Results.
+
+## SECTION 4 – SUI GENERIS DATABASE RIGHTS
+Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
+a.	for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for Non-Commercial purposes only, subject to the provision of the Section 6;
+b.	if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Derivative Work; and
+c.	You must comply with the conditions in Section 3(b) and (c) and in Section 6 if You Share all or a substantial portion of the contents of the database.
+For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this License where the Licensed Rights include other Copyright and Similar Rights.
+
+## SECTION 5 – DISCLAIMER OF WARRANTIES - LIMITATION OF LIABILITY - INDEMNIFICATION
+
+Disclaimer. You acknowledge that the Licensed Material is offered and made available “as-is”, and that the Licensor makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, produce any particular results, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors or free of viruses or other harmful component, whether or not known or discoverable. 
+
+Liability. You are responsible for your use of the Licensed Material, any Derivative Works or Results and shall bear all risks associated with this use, including but not limited to: instability, malfunctioning, loss of content and in particular data, and other damage or loss. To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this License or use of the Licensed Material, any Derivative Works or Results even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. 
+c.	The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
+d.	Indemnification. You agree to indemnify, defend, and hold harmless the Licensor, its affiliates, and their respective officers, directors, employees, and agents from and against any and all claims, liabilities, damages, losses, or expenses, including reasonable attorneys'' fees and costs, arising out of or in any way connected with Your access to or use of the Licensed Material, Derivative Works or Results.
+
+## SECTION 6 – THIRD PARTY MATERIALS
+
+Third Party Materials. The Licensed Material may contain Third Party Materials, which are subject to the license terms of the respective third-party licensors and on which the provisions of the License do not apply. In particular, and without limitation, You acknowledge that the Licensed Material includes the Third Party Materials listed in Annex 1 of this License, and their respective license terms indicated therein.
+
+Disclaimer. You are solely responsible for the use of any Third Party Material, whether or not listed in this License, for compliance with the associated license terms and to obtain the necessary rights to use such Third Party Material if any. Your dealings or correspondence with third parties and Your use of or interaction with any Third Party Materials are solely between You and the third party. Licensor does not control or endorse, and makes no representations or warranties regarding any Third Party Materials, and Your access to and use of such Third Party Materials are at your own risk.
+
+## SECTION 7 – TERM AND TERMINATION
+
+a.	This License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this License, then Your rights under this License terminate automatically.
+b.	Where Your right to use the Licensed Material has terminated under Section 7(a), it reinstates:
+1.	automatically as of the date the violation is cured, provided it is cured within thirty (30) days of Your discovery of the violation; or
+2.	upon express reinstatement by the Licensor.
+For the avoidance of doubt, this Section 7(b) does not affect any right the Licensor may have to seek remedies for Your violations of this License.
+c.	For the avoidance of doubt, the Licensor may offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time. Licensor may also specifically terminate this License, in whole or in part, at any time upon notice (including electronic) to you. Licensor may also modify the terms contained herein at any time without notice. You are responsible for periodically reviewing this License for any updates or changes, and continued use of the Licensed Materials constitutes acceptance of any such modifications of the License terms.
+d.	Sections 1, 5, 6, 7, 8 and 9 survive termination of this License.
+
+## SECTION 8 – OTHER TERMS AND CONDITIONS
+
+a.	The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
+b.	Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this License.
+
+## SECTION 9 – INTERPRETATION
+
+a.	For the avoidance of doubt, this License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this License.
+b.	To the extent possible, if any provision of this License is deemed unlawful, void or unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this License without affecting the enforceability of the remaining terms and conditions.
+c.	No term or condition of this License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. Waiver of the performance of any of the provisions of the License in no way entails or implies waiver of performance of the other obligations.
+d.	This License shall be governed by and construed in accordance with the laws of the State of New York, without regard to its conflict of law provisions. Any and all disputes and controversies arising out of or in connection with this License shall be finally submitted to courts located in the State of New York, as the exclusive jurisdiction.
+e.	This License constitute the entire agreement between You and the Licensor with respect to the Licensed Material, and supersede all prior or contemporaneous communications and proposals, whether oral or written relating to the subject matter hereof.
+
+
+## Annex 1 – Third Party Materials
+
+
+Third Party Code Libraries.
+
+    ---------------------------------------------------------------------------
+    Name of the code library
+	Version
+	License
+	Licensor
+	Github repository
+ 	Description of the changes (optional)
+    -------------------------------------------------------------------
+    ---------------------------------------------------------------------------
+
+    -------------------------------------------------------------------
+    ---------------------------------------------------------------------------
+	argparse
+	Not specified
+	Python Software Foundation License 2.0
+	Steven Bethard
+	https://github.com/ThomasWaldmann/argparse/
+ 	We have not made any modification to Python
+    ---------------------------------------------------------------------------
+	build
+	1.0.3
+	MIT license
+	Filipe Laíns
+	https://github.com/pypa/build/
+    ---------------------------------------------------------------------------
+	git-python
+	3.1.37
+	BDS-3 Clause
+	Michael Trier and contributors
+	https://github.com/gitpython-developers/GitPython/
+    ---------------------------------------------------------------------------
+	hydra-core
+	1.3.2
+	MIT license
+	Facebook, Inc. and its affiliates.
+	https://github.com/facebookresearch/hydra/
+    ---------------------------------------------------------------------------
+	indcomp
+	0.2.1
+	MIT license
+	Aidan Cooper
+	https://github.com/AidanCooper/indcomp/
+    ---------------------------------------------------------------------------
+	lifelines
+	0.27.7
+	MIT License
+	Cameron Davidson-Pilon
+	https://github.com/CamDavidsonPilon/lifelines/
+    ---------------------------------------------------------------------------
+	numpy
+	1.26.0
+	BDS3-Clause
+	NumPy developers
+	https://github.com/numpy/numpy/
+    ---------------------------------------------------------------------------
+	opacus
+	1.4.0
+	Apache license 2.0
+	Not specified
+	https://github.com/pytorch/opacus
+    ---------------------------------------------------------------------------
+	pandas
+	2.1.1
+	BDS3-Clause
+	AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+	https://github.com/pandas-dev/pandas/
+    ---------------------------------------------------------------------------
+	pre-commit
+	3.4.0
+	MIT license
+	Anthony Sottile, Ken Struys
+	https://github.com/pre-commit/pre-commit
+    ---------------------------------------------------------------------------
+	pydantic
+	<2.0
+	MIT license
+	Pydantic Services Inc. and individual contributors
+	https://github.com/pydantic/pydantic
+    ---------------------------------------------------------------------------
+	scikit-learn
+	1.2.1
+	BSD-3 Clause
+	Scikit-learn developers
+	https://github.com/scikit-learn/scikit-learn/
+    ---------------------------------------------------------------------------
+	scipy
+	1.11.2
+	BSD-3 Clause
+	Enthought, Inc. 2003-2023, SciPy Developers
+	https://github.com/scipy/scipy
+    ---------------------------------------------------------------------------
+	seaborn
+	0.12.2
+	BDS-3 Clause
+	Michael L. Waskom All rights reserved.
+	https://github.com/mwaskom/seaborn
+    ---------------------------------------------------------------------------
+	setuptools
+	68.2.2
+	MIT license
+	Not specified
+	https://github.com/pypa/setuptools
+    ---------------------------------------------------------------------------
+	substra
+	0.46.0
+	Apache license 2.0
+	Owkin, Inc.
+	https://github.com/Substra/substra
+    ---------------------------------------------------------------------------
+	substrafl
+	0.39.0
+	Apache license 2.0
+	Owkin, Inc.
+	https://github.com/Substra/substrafl
+    ---------------------------------------------------------------------------
+	tensorboard
+	2.12.0
+	Apache license 2.0
+	The TensorFlow Authors
+	https://github.com/tensorflow/tensorboard
+    ---------------------------------------------------------------------------
+	torch
+	1.13.1
+	Modified BSD Clause
+	See LICENSE
+	https://github.com/pytorch/pytorch/
+    ---------------------------------------------------------------------------
+	torchvision
+	1.14.1
+	Modified BSD Clause
+	Soumith Chintala
+	https://github.com/pytorch/vision
+
+
+Third Party Datasets.
+    
+    N/A
+
diff --git a/quickstart/quickstart.md b/quickstart/quickstart.md
new file mode 100644
index 00000000..9f5c4784
--- /dev/null
+++ b/quickstart/quickstart.md
@@ -0,0 +1,164 @@
+## Quickstart
+
+FedECA tries to mimic scikit-learn API as much as possible with the constraints
+of distributed learning.
+The first step in data science is always the data.
+We need to first use or generate some survival data in pandas.dataframe format.
+Note that fedeca should work on any data format, provided that the
+return type of the substra opener is indeed a pandas.dataframe but let's keep
+it simple in this quickstart.
+
+Here we will use fedeca utils which will generate some synthetic survival data
+following CoxPH assumptions:
+
+```python
+import pandas as pd
+from fedeca.utils.survival_utils import CoxData
+# Let's generate 1000 data samples with 10 covariates
+data = CoxData(seed=42, n_samples=1000, ndim=10)
+df = data.generate_dataframe()
+
+# We remove the true propensity score
+df = df.drop(columns=["propensity_scores"], axis=1)
+```
+Let's inspect the data that we have here.
+```python
+print(df.info())
+# <class 'pandas.core.frame.DataFrame'>
+# RangeIndex: 1000 entries, 0 to 999
+# Data columns (total 13 columns):
+#  #   Column     Non-Null Count  Dtype
+# ---  ------     --------------  -----
+#  0   X_0        1000 non-null   float64
+#  1   X_1        1000 non-null   float64
+#  2   X_2        1000 non-null   float64
+#  3   X_3        1000 non-null   float64
+#  4   X_4        1000 non-null   float64
+#  5   X_5        1000 non-null   float64
+#  6   X_6        1000 non-null   float64
+#  7   X_7        1000 non-null   float64
+#  8   X_8        1000 non-null   float64
+#  9   X_9        1000 non-null   float64
+#  10  time       1000 non-null   float64
+#  11  event      1000 non-null   uint8
+#  12  treatment  1000 non-null   uint8
+# dtypes: float64(11), uint8(2)
+# memory usage: 88.0 KB
+print(df.head())
+#         X_0       X_1       X_2       X_3       X_4       X_5       X_6       X_7       X_8       X_9      time  event  treatment
+# 0 -0.918373 -0.814340 -0.148994  0.482720 -1.130384 -1.254769 -0.462002  1.451622  1.199705  0.133197  2.573516      1          1
+# 1  0.360051 -0.863619  0.198673  0.330630 -0.189184 -0.802424 -1.694990 -0.989009 -0.421245 -0.112665  0.519108      1          1
+# 2  0.442502  0.024682  0.069500 -0.398015 -0.521236 -0.824907  0.373018  1.016843  0.765661  0.858817  0.652803      1          1
+# 3 -0.783965 -1.116391 -1.482413 -2.039827 -1.639304 -0.500380 -0.298467 -1.801688 -0.743004 -0.724039  0.074925      1          1
+# 4 -0.199620 -0.652347 -0.018776  0.004630 -0.122242 -0.413490 -0.450718 -0.761894 -1.323135 -0.234899  0.006951      1          1
+print(df["treatment"].unique())
+# array([1, 0], dtype=uint8)
+df["treatment"].sum()
+# 500
+```
+So we have survival data with covariates and a binary treatment variable.
+Let's inspect it using proper survival plots using the great survival analysis
+package [lifelines](https://github.com/CamDavidsonPilon/lifelines) that was a
+source of inspiration for fedeca:
+```python
+from lifelines import KaplanMeierFitter as KMF
+import matplotlib.pyplot as plt
+treatments = [0, 1]
+kms = [KMF().fit(durations=df.loc[df["treatment"] == t]["time"], event_observed=df.loc[df["treatment"] == t]["event"]) for t in treatments]
+
+axs = [km.plot(label="treated" if t == 1 else "untreated") for km, t in zip(kms, treatments)]
+axs[-1].set_ylabel("Survival Probability")
+plt.xlim(0, 1500)
+plt.savefig("treated_vs_untreated.pdf", bbox_inches="tight")
+```
+Open `treated_vs_untreated.pdf` in your favorite pdf viewer and see for yourself.
+
+## Pooled IPTW analysis
+The treatment seems to improve survival but it's hard to say for sure as it might
+simply be due to chance or sampling bias.
+Let's perform an IPTW analysis to be sure:
+
+```python
+from fedeca.competitors import PooledIPTW
+pooled_iptw = PooledIPTW(treated_col="treatment", event_col="event", duration_col="time")
+# Targets is the propensity weights
+pooled_iptw.fit(data=df, targets=None)
+print(pooled_iptw.results_)
+#                coef  exp(coef)  se(coef)  coef lower 95%  coef upper 95%  exp(coef) lower 95%  exp(coef) upper 95%  cmp to         z         p  -log2(p)
+# covariate
+# treatment  0.041727    1.04261  0.070581       -0.096609        0.180064             0.907911             1.197294     0.0  0.591196  0.554389   0.85103
+```
+When looking at the `p-value=0.554389 > 0.05`, thus judging by what we observe we
+cannot say for sure that there is a treatment effect. We say the ATE is non significant.
+
+## Distributed Analysis
+
+However in practice data is private and held by different institutions. Therefore
+in practice each client holds a subset of the rows of our dataframe.
+We will simulate this using a realistic scenario where a "pharma" node is developing
+a new drug and thus holds all treated and the rest of the data is split across
+3 other institutions where patients were treated with the old drug.
+We will use the split utils of FedECA.
+```python
+from fedeca.utils.data_utils import split_dataframe_across_clients
+
+clients, train_data_nodes, _, _, _ = split_dataframe_across_clients(
+    df,
+    n_clients=4,
+    split_method= "split_control_over_centers",
+    split_method_kwargs={"treatment_info": "treatment"},
+    data_path="./data",
+    backend_type="simu",
+)
+```
+Note that you can replace split_method by any callable with the signature
+`pd.DataFrame -> list[int]` where the list of ints is the split of the indices
+of the df across the different institutions.
+To convince you that the split was effective you can inspect the folder "./data".
+You will find different subfolders `center0` to `center3` each with different
+parts of the data.
+To unpack a bit what is going on in more depth, we have created a dict of client
+'clients',
+which is a dict with 4 keys containing substra API handles towards the different
+institutions and their data.
+`train_data_nodes` is a list of handles towards the datasets of the different institutions
+that were registered through the substra interface using the data in the different
+folders.
+You might have noticed that we did not talk about the `backend_type` argument. 
+This argument is used to choose on which network will experiments be run.
+"simu" means in-RAM. If you finish this tutorial do try other values such as:
+"docker" or "subprocess" but expect a significant slow-down as experiments
+get closer and closer to a real distributed system.
+
+Now let's try to see if we can reproduce the pooled anaysis in this much more
+complicated distributed setting:
+```python
+from fedeca import FedECA
+# We use the first client as the node, which launches order
+ds_client = clients[list(clients.keys())[0]]
+fed_iptw = FedECA(ndim=10, ds_client=ds_client, train_data_nodes=train_data_nodes, treated_col="treatment", duration_col="time", event_col="event", robust=True)
+fed_iptw.run()
+# Final partial log-likelihood:
+# [-11499.19619422]
+#        coef  se(coef)  coef lower 95%  coef upper 95%         z         p  exp(coef)  exp(coef) lower 95%  exp(coef) upper 95%
+# 0  0.041718  0.070581       -0.096618        0.180054  0.591062  0.554479     1.0426             0.907902             1.197282
+```
+In fact what we did above is both quite verbose. For simulation purposes we
+advise to use directly the scikit-learn inspired syntax:
+```python
+from fedeca import FedECA
+
+fed_iptw = FedECA(ndim=10, treated_col="treatment", event_col="event", duration_col="time")
+fed_iptw.fit(df, n_clients=4, split_method="split_control_over_centers", split_method_kwargs={"treatment_info": "treatment"}, data_path="./data", robust=True, backend_type="simu")
+#        coef  se(coef)  coef lower 95%  coef upper 95%         z         p  exp(coef)  exp(coef) lower 95%  exp(coef) upper 95%
+# 0  0.041718  0.070581       -0.096618        0.180054  0.591062  0.554479     1.0426             0.907902             1.197282
+```
+We find a similar p-value ! The distributed analysis is working as expected.
+We recommend to users that made it to here as a next step to use their own data
+and write custom split functions and to test this pipeline under various
+heterogeneity settings.
+Another interesting avenue is to try adding differential privacy to the training
+of the propensity model but that is outside the scope of this quickstart. 
+
+
+
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..57069da0
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[flake8]
+max-line-length = 89
+
+[isort]
+profile = black
+skip = __init__.py
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..e5a62583
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,69 @@
+"""Setup script for fedeca."""
+from setuptools import find_packages, setup
+
+deps = ["docformatter"]
+tests = ["pytest", "coverage"]
+docs = [
+    "jupyter",
+    "sphinx==4.4.0",
+    "sphinx_rtd_theme==1.0.0",
+    "sphinx-autobuild==2020.9.1",
+    "texttable==1.6.3",
+    "myst-parser==0.16.1",
+    # Sphinx 3.3.1 does not require a specific version of docutils
+    # but docutils 0.17 changed the output html markup, breaking the RTD theme
+    # original issue: https://github.com/sphinx-doc/sphinx/issues/9051
+    "docutils==0.16",
+    "sphinx_click==3.1.0",
+    # "sphinx_gallery==0.11.1",
+    "sphinx_autodoc_typehints",
+    "sphinxcontrib-bibtex==2.5.0",
+    "gitpython>=3.1.27",
+]
+all_extra = deps + tests + docs
+
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+
+setup(
+    name="fedeca",
+    version="0.0.2",
+    python_requires=">=3.9.0,<3.11",
+    license="MIT",
+    classifiers=[
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Intended Audience :: Science/Research",
+        "Programming Language :: Python :: 3",
+    ],
+    install_requires=[
+        # We cannot use the git+https syntax here because of docker build issues
+        "substrafl @ https://github.com/Substra/substrafl/archive/refs/heads/feat/substrafl-simu-mode.zip",  # noqa: E501
+        "argparse",
+        "numpy",
+        "pandas",
+        "pre-commit",
+        "scipy",
+        "seaborn",
+        "opacus",
+        "lifelines",
+        "git-python",
+        "build",
+        "torch==1.13.1",
+        "scikit-learn==1.2.1",
+        "pydantic<2.0",  # Need to be updated to > 2.0 to use latest Substra
+        "indcomp==0.2.1",
+        "hydra-core",
+    ],
+    extras_require={
+        "all_extra": all_extra,
+    },
+    description="Federated External Control Arm with substra",
+    long_description=long_description,
+    author="""
+        Jean Ogier du Terrail, Quentin Klopfenstein,
+        Honghao Li, Nicolas Loiseau, Mathieu Andreux,
+        Félix Balazard""",
+    author_email="jean.du-terrail@owkin.com",
+    packages=find_packages(exclude=["tests*"]),
+    include_package_data=True,
+)