diff --git a/.github/workflows/ingest.yml b/.github/workflows/ingest.yml
new file mode 100644
index 000000000..c20940853
--- /dev/null
+++ b/.github/workflows/ingest.yml
@@ -0,0 +1,82 @@
+name: ingest
+on:
+  push:
+  workflow_dispatch:
+    inputs:
+      build_arm:
+        type: boolean
+        description: "Build for ARM as well"
+        default: false
+        required: false
+
+env:
+  DOCKER_IMAGE_NAME: ghcr.io/loculus-project/ingest
+  BUILD_ARM: ${{ github.ref == 'refs/heads/main' || github.event.inputs.build_arm }}
+
+concurrency:
+  group: ci-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-ingest
+  cancel-in-progress: true
+
+jobs:
+  dockerImage:
+    name: Build ingest Docker Image # Don't change: Referenced by .github/workflows/update-argocd-metadata.yml
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    permissions:
+      contents: read
+      packages: write
+      checks: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Generate files hash
+        id: files-hash
+        run: |
+          DIR_HASH=$(echo -n ${{ hashFiles('ingest/**', '.github/workflows/ingest.yml') }})
+          echo "DIR_HASH=$DIR_HASH${{ env.BUILD_ARM && '-arm'|| '' }}" >> $GITHUB_ENV
+
+      - name: Setup Docker metadata
+        id: dockerMetadata
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.DOCKER_IMAGE_NAME }}
+          tags: |
+            type=raw,value=${{ env.DIR_HASH }}
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+            type=ref,event=branch
+            type=sha,prefix=commit-
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check if image exists
+        id: check-image
+        run: |
+          EXISTS=$(docker manifest inspect ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }} > /dev/null 2>&1 && echo "true" || echo "false")
+          echo "CACHE_HIT=$EXISTS" >> $GITHUB_ENV
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build and push image if input files changed
+        if: env.CACHE_HIT == 'false'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./ingest
+          push: true
+          tags: ${{ steps.dockerMetadata.outputs.tags }}
+          cache-from: type=gha,scope=ingest-${{ github.ref }}
+          cache-to: type=gha,mode=max,scope=ingest-${{ github.ref }}
+          platforms: ${{ env.BUILD_ARM && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
+
+      - name: Retag and push existing image if cache hit
+        if: env.CACHE_HIT == 'true'
+        run: |
+          TAGS=(${{ steps.dockerMetadata.outputs.tags }})
+          for TAG in "${TAGS[@]}"; do
+            docker buildx imagetools create --tag $TAG ${{ env.DOCKER_IMAGE_NAME }}:${{ env.DIR_HASH }}
+          done
diff --git a/ingest/.dockerignore b/ingest/.dockerignore
new file mode 100644
index 000000000..7f57bcabc
--- /dev/null
+++ b/ingest/.dockerignore
@@ -0,0 +1,8 @@
+.snakemake/
+.git/
+data/
+results/
+result/
+.DS_Store
+.ruff_cache
+config/config.yaml
\ No newline at end of file
diff --git a/ingest/.gitignore b/ingest/.gitignore
new file mode 100644
index 000000000..55edfdf82
--- /dev/null
+++ b/ingest/.gitignore
@@ -0,0 +1,5 @@
+.snakemake/
+data/
+results/
+.DS_Store
+.ruff_cache
\ No newline at end of file
diff --git a/ingest/.mambarc b/ingest/.mambarc
new file mode 100644
index 000000000..8809fe054
--- /dev/null
+++ b/ingest/.mambarc
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+repodata_use_zst: true
+channel_priority: strict
+download_threads: 20
\ No newline at end of file
diff --git a/ingest/Dockerfile b/ingest/Dockerfile
new file mode 100644
index 000000000..1b61b83e1
--- /dev/null
+++ b/ingest/Dockerfile
@@ -0,0 +1,15 @@
+FROM mambaorg/micromamba:1.5.7
+
+COPY --chown=$MAMBA_USER:$MAMBA_USER environment.yml /tmp/env.yaml
+COPY --chown=$MAMBA_USER:$MAMBA_USER .mambarc /tmp/.mambarc
+
+RUN micromamba config set extract_threads 1 \
+    && micromamba install -y -n base -f /tmp/env.yaml --rc-file /tmp/.mambarc \
+    && micromamba clean --all --yes
+
+# Set the environment variable to activate the conda environment
+ARG MAMBA_DOCKERFILE_ACTIVATE=1
+
+COPY --chown=$MAMBA_USER:$MAMBA_USER . /package
+
+WORKDIR /package
\ No newline at end of file
diff --git a/ingest/README.md b/ingest/README.md
new file mode 100644
index 000000000..e78bd8974
--- /dev/null
+++ b/ingest/README.md
@@ -0,0 +1,12 @@
+# Pipeline to ingest data from INSDC into loculus
+
+## Overview
+
+1. Download data from INSDC
+2. Filtering
+3. Turn into FASTA/Metadata
+4. Upload to loculus
+
+## Deployment
+
+Pipeline shall be put in a docker container that takes a config file as input
diff --git a/ingest/Snakefile b/ingest/Snakefile
new file mode 100644
index 000000000..0bec2fd87
--- /dev/null
+++ b/ingest/Snakefile
@@ -0,0 +1,124 @@
+TAXON_ID = config["taxon_id"]
+ALL_FIELDS = ",".join(config["all_fields"])
+COLUMN_MAPPING = config["column_mapping"]
+LOG_LEVEL = config.get("log_level", "INFO")
+
+
+def rename_columns(input_file, output_file):
+    with open(input_file, "r") as f:
+        header = f.readline().strip().split("\t")
+        header = [COLUMN_MAPPING.get(h, h) for h in header]
+        with open(output_file, "w") as g:
+            g.write("\t".join(header) + "\n")
+            for line in f:
+                g.write(line)
+
+
+rule all:
+    input:
+        "data/sequences.fasta",
+        "data/metadata.tsv",
+
+
+rule fetch_ncbi_dataset_package:
+    output:
+        dataset_package="results/ncbi_dataset.zip",
+    retries: 5
+    shell:
+        """
+        datasets download virus genome taxon {TAXON_ID} \
+            --no-progressbar \
+            --filename {output.dataset_package}
+        """
+
+
+rule extract_ncbi_dataset_sequences:
+    input:
+        dataset_package="results/ncbi_dataset.zip",
+    output:
+        ncbi_dataset_sequences="results/sequences.fasta",
+    shell:
+        """
+        unzip -jp {input.dataset_package} \
+            ncbi_dataset/data/genomic.fna \
+        | seqkit seq -i -w0 \
+        > {output.ncbi_dataset_sequences}
+        """
+
+
+rule format_ncbi_dataset_report:
+    input:
+        dataset_package="results/ncbi_dataset.zip",
+    output:
+        ncbi_dataset_tsv="results/metadata_post_extract.tsv",
+    params:
+        fields_to_include=ALL_FIELDS,
+    shell:
+        """
+        dataformat tsv virus-genome \
+            --package {input.dataset_package} \
+            --fields {params.fields_to_include:q} \
+            > {output.ncbi_dataset_tsv}
+        """
+
+
+rule rename_columns:
+    input:
+        ncbi_dataset_tsv="results/metadata_post_extract.tsv",
+    output:
+        ncbi_dataset_tsv="results/metadata_post_rename.tsv",
+    run:
+        rename_columns(input.ncbi_dataset_tsv, output.ncbi_dataset_tsv)
+
+
+rule prepare_metadata:
+    input:
+        metadata="results/metadata_post_rename.tsv",
+        config="config/config.yaml",
+    output:
+        metadata="results/metadata_post_prepare.tsv",
+    params:
+        log_level=LOG_LEVEL,
+    shell:
+        """
+        python scripts/prepare_metadata.py \
+            --config-file {input.config} \
+            --input {input.metadata} \
+            --output {output.metadata} \
+            --log-level {params.log_level} \
+        """
+
+
+rule submit_to_loculus:
+    input:
+        metadata="results/metadata_post_prepare.tsv",
+        sequences="results/sequences.fasta",
+        config="config/config.yaml",
+    output:
+        submitted=touch("results/submitted"),
+    params:
+        log_level=LOG_LEVEL,
+    shell:
+        """
+        python scripts/submit_to_loculus.py \
+            --mode submit \
+            --metadata {input.metadata} \
+            --sequences {input.sequences} \
+            --config-file {input.config} \
+            --log-level {params.log_level} \
+        """
+
+
+rule approve:
+    input:
+        submitted="results/submitted",
+        config="config/config.yaml",
+    params:
+        log_level=LOG_LEVEL,
+    shell:
+        """
+        python scripts/submit_to_loculus.py \
+            --mode approve \
+            --config-file {input.config} \
+            --log-level {params.log_level} \
+        """
diff --git a/ingest/config/config.yaml b/ingest/config/config.yaml
new file mode 100644
index 000000000..ae937d5bb
--- /dev/null
+++ b/ingest/config/config.yaml
@@ -0,0 +1,137 @@
+log_level: DEBUG
+compound_country_field: ncbi_geo_location
+fasta_id_field: genbank_accession
+rename:
+  genbank_accession: insdc_accession_full
+  ncbi_collection_date: collection_date
+  ncbi_isolate_name: isolate_name
+  ncbi_isolate_source: isolate_source
+  ncbi_sra_accessions: sra_accessions
+  ncbi_submitter_affiliation: author_affiliation
+  ncbi_submitter_country: submitter_country
+  ncbi_submitter_names: authors
+keep:
+  - division
+  - country
+  - submissionId
+  - insdc_accession_base
+  - insdc_version
+  - bioprojects
+  - biosample_accession
+  - ncbi_completeness
+  - ncbi_host_name
+  - ncbi_host_tax_id
+  - ncbi_is_lab_host
+  - ncbi_length
+  - ncbi_protein_count
+  - ncbi_release_date
+  - ncbi_update_date
+  - ncbi_sourcedb
+  - ncbi_virus_name
+  - ncbi_virus_tax_id
+taxon_id: 186538
+all_fields:
+  - accession
+  - bioprojects
+  - biosample-acc
+  - completeness
+  - gene-count
+  - geo-location
+  - geo-region
+  - host-common-name
+  - host-infraspecific-breed
+  - host-infraspecific-cultivar
+  - host-infraspecific-ecotype
+  - host-infraspecific-isolate
+  - host-infraspecific-sex
+  - host-infraspecific-strain
+  - host-name
+  - host-pangolin
+  - host-tax-id
+  - is-annotated
+  - is-complete
+  - is-lab-host
+  - is-vaccine-strain
+  - isolate-collection-date
+  - isolate-lineage
+  - isolate-lineage-source
+  - lab-host
+  - length
+  - matpeptide-count
+  - mol-type
+  - nucleotide-completeness
+  - protein-count
+  - purpose-of-sampling
+  - release-date
+  - sourcedb
+  - sra-accs
+  - submitter-affiliation
+  - submitter-country
+  - submitter-names
+  - update-date
+  - virus-common-name
+  - virus-infraspecific-breed
+  - virus-infraspecific-cultivar
+  - virus-infraspecific-ecotype
+  - virus-infraspecific-isolate
+  - virus-infraspecific-sex
+  - virus-infraspecific-strain
+  - virus-name
+  - virus-pangolin
+  - virus-tax-id
+column_mapping:
+  Accession: genbank_accession
+  BioProjects: bioprojects
+  BioSample accession: biosample_accession
+  Completeness: ncbi_completeness
+  Gene count: ncbi_gene_count
+  Geographic Location: ncbi_geo_location
+  Geographic Region: ncbi_geo_region
+  Host Common Name: ncbi_host_common_name
+  Host Infraspecific Names Breed: ncbi_host_breed
+  Host Infraspecific Names Cultivar: ncbi_host_cultivar
+  Host Infraspecific Names Ecotype: ncbi_host_ecotype
+  Host Infraspecific Names Isolate: ncbi_host_isolate
+  Host Infraspecific Names Sex: ncbi_host_sex
+  Host Infraspecific Names Strain: ncbi_host_strain
+  Host Name: ncbi_host_name
+  Host Pangolin Classification: ncbi_host_pangolin
+  Host Taxonomic ID: ncbi_host_tax_id
+  Is Annotated: ncbi_is_annotated
+  Is Complete: ncbi_is_complete
+  Is Lab Host: ncbi_is_lab_host
+  Is Vaccine Strain: ncbi_is_vaccine_strain
+  Isolate Collection date: ncbi_collection_date
+  Isolate Lineage: ncbi_isolate_name
+  Isolate Lineage source: ncbi_isolate_source
+  Lab Host: ncbi_lab_host
+  Length: ncbi_length
+  Mature peptide count: ncbi_mature_peptide_count
+  Molecule type: ncbi_mol_type
+  Nucleotide completeness: ncbi_nucleotide_completeness
+  Protein count: ncbi_protein_count
+  Purpose of Sampling: ncbi_purpose_of_sampling
+  Release date: ncbi_release_date
+  Source database: ncbi_sourcedb
+  SRA Accessions: ncbi_sra_accessions
+  Submitter Affiliation: ncbi_submitter_affiliation
+  Submitter Country: ncbi_submitter_country
+  Submitter Names: ncbi_submitter_names
+  Update date: ncbi_update_date
+  Virus Common Name: ncbi_virus_common_name
+  Virus Infraspecific Names Breed: ncbi_virus_breed
+  Virus Infraspecific Names Cultivar: ncbi_virus_cultivar
+  Virus Infraspecific Names Ecotype: ncbi_virus_ecotype
+  Virus Infraspecific Names Isolate: ncbi_virus_isolate
+  Virus Infraspecific Names Sex: ncbi_virus
+  Virus Infraspecific Names Strain: ncbi_virus_strain
+  Virus Name: ncbi_virus_name
+  Virus Pangolin Classification: ncbi_virus_pangolin
+  Virus Taxonomic ID: ncbi_virus_tax_id
+group_name: insdc_ingest_group
+username : insdc_ingest_user
+password : insdc_ingest_user
+keycloak_client_id : test-cli
+backend_url : https://backend-ingest.loculus.org/
+keycloak_token_url : https://authentication-ingest.loculus.org/realms/loculus/protocol/openid-connect/token
+organism: ebola-zaire
diff --git a/ingest/environment.yml b/ingest/environment.yml
new file mode 100644
index 000000000..48f6f6b73
--- /dev/null
+++ b/ingest/environment.yml
@@ -0,0 +1,15 @@
+name: loculus-ingest
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - python=3.12
+  - pip=24.0
+  - ncbi-datasets-cli
+  - snakemake
+  - pandas
+  - PyYAML
+  - click
+  - requests
+  - seqkit
+  - unzip
diff --git a/ingest/profiles/default/config.yaml b/ingest/profiles/default/config.yaml
new file mode 100644
index 000000000..f8e8e4d7e
--- /dev/null
+++ b/ingest/profiles/default/config.yaml
@@ -0,0 +1,4 @@
+rerun-incomplete: true
+printshellcmds: true
+cores: all
+configfile: config/config.yaml
\ No newline at end of file
diff --git a/ingest/ruff.toml b/ingest/ruff.toml
new file mode 100644
index 000000000..56f010e91
--- /dev/null
+++ b/ingest/ruff.toml
@@ -0,0 +1,5 @@
+target-version = "py311"
+line-length = 100
+
+[lint]
+select = ["E", "F", "B"]
diff --git a/ingest/scripts/prepare_metadata.py b/ingest/scripts/prepare_metadata.py
new file mode 100644
index 000000000..4241130a9
--- /dev/null
+++ b/ingest/scripts/prepare_metadata.py
@@ -0,0 +1,64 @@
+"""Script to rename fields and transform values prior to submission to Loculus"""
+
+# Needs to be configurable via yaml file
+# Start off with a simple mapping
+# Add transformations that can be applied to certain fields
+# Like separation of country into country and division
+
+import hashlib
+import logging
+from dataclasses import dataclass
+
+import click
+import pandas as pd
+import yaml
+
+
+@dataclass
+class Config:
+    compound_country_field: str
+    fasta_id_field: str
+    rename: dict[str, str]
+    keep: list[str]
+
+
+def hash_row_with_columns(row: pd.Series) -> str:
+    items = sorted((f"{col}_{val}" for col, val in row.items()))
+    row_string = "".join(items)
+    return hashlib.sha256(row_string.encode()).hexdigest()
+
+
+@click.command()
+@click.option("--config-file", required=True, type=click.Path(exists=True))
+@click.option("--input", required=True, type=click.Path(exists=True))
+@click.option("--output", required=True, type=click.Path())
+@click.option("--log-level", default="INFO", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]))
+def main(config_file: str, input: str, output: str, log_level: str) -> None:
+    logging.basicConfig(level=log_level)
+    with open(config_file) as file:
+        full_config = yaml.safe_load(file)
+        relevant_config = {key: full_config[key] for key in Config.__annotations__}
+        config = Config(**relevant_config)
+    logging.debug(config)
+    df = pd.read_csv(input, sep="\t").sort_values(by=config.compound_country_field)
+    logging.debug(df.columns)
+    df["division"] = df[config.compound_country_field].str.split(":", n=1).str[1].str.strip()
+    logging.debug(df["division"].unique())
+    df["country"] = df[config.compound_country_field].str.split(":", n=1).str[0].str.strip()
+    logging.debug(df["country"].unique())
+    df["submissionId"] = df[config.fasta_id_field]
+    logging.debug(df["submissionId"].unique())
+    df["insdc_accession_base"] = df[config.fasta_id_field].str.split(".", n=1).str[0]
+    logging.debug(df["insdc_accession_base"])
+    df["insdc_version"] = df[config.fasta_id_field].str.split(".", n=1).str[1]
+    logging.debug(df["insdc_version"].unique())
+    df = df.rename(columns=config.rename)
+    # Drop columns that are neither a value of `rename` nor in `keep`
+    df = df.drop(columns=set(df.columns) - set(config.rename.values()) - set(config.keep))
+    # Create a metadata hash that is independent of the order of the columns
+    df["metadata_hash"] = df.apply(hash_row_with_columns, axis=1)
+    df.to_csv(output, sep="\t", index=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ingest/scripts/submit_to_loculus.py b/ingest/scripts/submit_to_loculus.py
new file mode 100644
index 000000000..381cd2715
--- /dev/null
+++ b/ingest/scripts/submit_to_loculus.py
@@ -0,0 +1,207 @@
+import logging
+from dataclasses import dataclass
+from time import sleep
+
+import click
+import requests
+import yaml
+
+logging.basicConfig(level=logging.DEBUG)
+
+@dataclass
+class Config:
+    organism: str
+    backend_url: str
+    keycloak_token_url: str
+    keycloak_client_id: str
+    username: str
+    password: str
+    group_name: str
+
+
+def organism_url(config: Config):
+    return f"{config.backend_url.rstrip('/')}/{config.organism.strip('/')}"
+
+
+def get_jwt(config: Config):
+    """
+    Get a JWT token for the given username and password
+    """
+
+    data = {
+        "username": config.username,
+        "password": config.password,
+        "grant_type": "password",
+        "client_id": config.keycloak_client_id,
+    }
+    headers = {"Content-Type": "application/x-www-form-urlencoded"}
+
+    keycloak_token_url = config.keycloak_token_url
+
+    response = requests.post(keycloak_token_url, data=data, headers=headers)
+    response.raise_for_status()
+
+    jwt_keycloak = response.json()
+    jwt = jwt_keycloak["access_token"]
+    return jwt
+
+
+def create_group(config: Config):
+    # Create the ingest group
+    url = f"{config.backend_url.rstrip('/')}/groups"
+    token = get_jwt(config)
+    group_name = config.group_name
+
+    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+
+    data = {
+        "groupName": group_name,
+        "institution": "NA",
+        "address": {
+            "line1": "1234 Loculus Street",
+            "line2": "NA",
+            "city": "Dortmund",
+            "state": "NRW",
+            "postalCode": "12345",
+            "country": "Germany",
+        },
+        "contactEmail": "something@loculus.org",
+    }
+
+    response = requests.post(url, json=data, headers=headers)
+
+    if response.status_code == 409:
+        print("Group already exists")
+    # raise if not 409 and not happy 2xx
+    elif not response.ok:
+        print(f"Error creating group: {response.json()}")
+        response.raise_for_status()
+
+
+def submit(metadata, sequences, config: Config):
+    """
+    Submit data to Loculus.
+    """
+
+    jwt = get_jwt(config)
+
+    # Endpoint URL
+    url = f"{organism_url(config)}/submit"
+
+    # Headers with Bearer Authentication
+    headers = {"Authorization": f"Bearer {jwt}"}
+
+    # Files to be uploaded
+    files = {
+        "metadataFile": open(metadata, "rb"),
+        "sequenceFile": open(sequences, "rb"),
+    }
+
+    # Query parameters
+    params = {
+        "groupName": config.group_name,
+        "dataUseTermsType": "OPEN",
+    }
+
+    # POST request
+    response = requests.post(url, headers=headers, files=files, params=params)
+    response.raise_for_status()
+
+    # Closing files
+    files["metadataFile"].close()
+    files["sequenceFile"].close()
+
+    return response.json()
+
+
+def approve(config: Config):
+    """
+    Get sequences that were preprocessed successfully and approve them.
+    1. Get the ids of the sequences that were preprocessed successfully
+        /ORGANISM/get-sequences
+    2. Approve the sequences
+    """
+    jwt = get_jwt(config)
+
+    url = f"{organism_url(config)}/get-sequences"
+
+    # Headers with Bearer Authentication
+    headers = {"Authorization": f"Bearer {jwt}"}
+
+    # POST request
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+
+    payload = {"scope": "ALL"}
+
+    url = f"{organism_url(config)}/approve-processed-data"
+
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+
+    return response.json()
+
+
+# %%
+
+
+@click.command()
+@click.option(
+    "--metadata",
+    required=False,
+    type=click.Path(exists=True),
+)
+@click.option(
+    "--sequences",
+    required=False,
+    type=click.Path(exists=True),
+)
+@click.option(
+    "--mode",
+    required=True,
+    type=click.Choice(["submit", "approve"]),
+)
+@click.option(
+    "--log-level",
+    default="INFO",
+    type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]),
+)
+@click.option(
+    "--config-file",
+    required=True,
+    type=click.Path(exists=True),
+)
+def submit_to_loculus(metadata, sequences, mode, log_level, config_file):
+    """
+    Submit data to Loculus.
+    """
+    logging.basicConfig(level=log_level)
+    with open(config_file) as file:
+        full_config = yaml.safe_load(file)
+        relevant_config = {key: full_config[key] for key in Config.__annotations__}
+        config = Config(**relevant_config)
+
+    if mode == "submit":
+        logging.info("Submitting to Loculus")
+        logging.debug(f"Config: {config}")
+        # Create group if it doesn't exist
+        logging.info(f"Creating group {config.group_name}")
+        create_group(config)
+        logging.info(f"Group {config.group_name} created")
+
+        # Submit
+        logging.info("Starting submission")
+        response = submit(metadata, sequences, config)
+        logging.info("Submission complete")
+
+    if mode == "approve":
+        while True:
+            logging.info("Approving sequences")
+            response = approve(config)
+            logging.debug(f"Approved: {response}")
+            sleep(10)
+
+
+
+if __name__ == "__main__":
+    submit_to_loculus()
diff --git a/kubernetes/loculus/templates/loculus-ingest-config.yaml b/kubernetes/loculus/templates/loculus-ingest-config.yaml
new file mode 100644
index 000000000..295ad89bf
--- /dev/null
+++ b/kubernetes/loculus/templates/loculus-ingest-config.yaml
@@ -0,0 +1,23 @@
+{{ $backendHost := .Values.disableBackend | ternary
+    "http://host.k3d.internal:8079"
+    "http://loculus-backend-service:8079"
+}}
+{{- $keycloakHost := .Values.environment | eq "server" | ternary
+    (printf "https://authentication-%s" $.Values.host)
+    "http://loculus-keycloak-service:8083"
+}}
+{{- range $key, $values := (.Values.organisms | default .Values.defaultOrganisms) }}
+{{- if $values.ingest }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: loculus-ingest-config-{{ $key }}
+data:
+  config.yaml: |
+    {{- $values.ingest.configFile | toYaml | nindent 4 }}
+    organism: {{ $key }}
+    backend_url: {{ $backendHost }}
+    keycloak_token_url: {{ $keycloakHost -}}/realms/loculus/protocol/openid-connect/token
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/kubernetes/loculus/templates/loculus-ingest-deployment.yaml b/kubernetes/loculus/templates/loculus-ingest-deployment.yaml
new file mode 100644
index 000000000..a5a7659d0
--- /dev/null
+++ b/kubernetes/loculus/templates/loculus-ingest-deployment.yaml
@@ -0,0 +1,46 @@
+{{- $dockerTag := include "loculus.dockerTag" .Values }}
+{{- if not .Values.disableIngest }}
+{{- range $key, $value := (.Values.organisms | default .Values.defaultOrganisms) }}
+{{- if $value.ingest }}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: loculus-ingest-{{ $key }}
+  annotations:
+    argocd.argoproj.io/sync-options: Replace=true
+    reloader.stakater.com/auto: "true"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: loculus
+      component: loculus-ingest-{{ $key }}
+  template:
+    metadata:
+      labels:
+        app: loculus
+        component: loculus-ingest-{{ $key }}
+    spec:
+      containers:
+        - name: ingest-{{ $key }}
+          image: {{ $value.ingest.image}}:{{ $dockerTag }}
+          imagePullPolicy: Always
+          args:
+            {{- range $arg := $value.ingest.args }}
+            - "{{ $arg }}"
+            {{- end }}
+      {{- if $value.ingest.configFile }}
+          volumeMounts:
+            - name: loculus-ingest-config-volume-{{ $key }}
+              mountPath: /package/config
+      volumes:
+        - name: loculus-ingest-config-volume-{{ $key }}
+          configMap:
+            name: loculus-ingest-config-{{ $key }}
+      {{- end }}
+      imagePullSecrets:
+        - name: ghcr-secret
+{{- end }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml
index 8a004a19c..0a7fe5669 100644
--- a/kubernetes/loculus/values.yaml
+++ b/kubernetes/loculus/values.yaml
@@ -12,6 +12,7 @@ keycloakDatabase:
 disableWebsite: false
 disableBackend: false
 disablePreprocessing: false
+disableIngest: false
 siloImportLimitSeconds: 3600
 accessionPrefix: "LOC_"
 name: "Loculus"
@@ -223,12 +224,97 @@ defaultOrganisms:
           autocomplete: true
         - name: authors
           type: string
+        - name: submitter_country
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: division
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: insdc_accession_base
+          type: string
+        - name: insdc_version
+          type: string
+        - name: insdc_accession_full
+          type: string
+        - name: bioprojects
+          type: string
+        - name: biosample_accession
+          type: string
+        - name: ncbi_completeness
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: ncbi_host_name
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: ncbi_host_tax_id
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: ncbi_is_lab_host
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: ncbi_length
+          type: string
+        - name: ncbi_protein_count
+          type: string
+        - name: ncbi_update_date
+          type: date
+        - name: ncbi_sourcedb
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: ncbi_virus_name
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: ncbi_virus_tax_id
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: isolate_source
+          type: string
+          generateIndex: true
+          autocomplete: true
+        - name: sra_accessions
+          type: string
+        - name: total_snps
+          type: string
+        - name: total_inserted_nucs
+          type: string
+        - name: total_deleted_nucs
+          type: string
+        - name: total_ambiguous_nucs
+          type: string
+        - name: total_unknown_nucs
+          type: string
+        - name: total_frame_shifts
+          type: string
+        - name: frame_shifts
+          type: string
+        - name: completeness
+          type: string
+        - name: total_stop_codons
+          type: string
+        - name: stop_codons
+          type: string
+        - name: metadata_hash
+          type: string
       website:
         tableColumns:
           - collection_date
           - country
+          - division
+          - submitter_country
           - author_affiliation
           - ncbi_release_date
+          - insdc_accession_full
+          - ncbi_length
+          - ncbi_host_name
         defaultOrderBy: collection_date
         defaultOrder: descending
       silo:
@@ -253,6 +339,46 @@ defaultOrganisms:
           - L
         batch_size: 100
         processing_spec:
+          total_snps:
+            function: identity
+            inputs:
+              input: nextclade.totalSubstitutions
+          total_inserted_nucs:
+            function: identity
+            inputs:
+              input: nextclade.totalInsertions
+          total_deleted_nucs:
+            function: identity
+            inputs:
+              input: nextclade.totalDeletions
+          total_ambiguous_nucs:
+            function: identity
+            inputs:
+              input: nextclade.totalNonACGTNs
+          total_unknown_nucs:
+            function: identity
+            inputs:
+              input: nextclade.totalMissing
+          total_frame_shifts:
+            function: identity
+            inputs:
+              input: nextclade.totalFrameShifts
+          frame_shifts:
+            function: identity
+            inputs:
+              input: nextclade.frameShifts
+          completeness:
+            function: identity
+            inputs:
+              input: nextclade.coverage
+          total_stop_codons:
+            function: identity
+            inputs:
+              input: nextclade.qc.stopCodons.totalStopCodons
+          stop_codons:
+            function: identity
+            inputs:
+              input: nextclade.qc.stopCodons.stopCodons
           collection_date:
             function: process_date
             inputs:
@@ -280,6 +406,225 @@ defaultOrganisms:
             function: identity
             inputs:
               input: isolate_name
+          submitter_country:
+            function: identity
+            inputs:
+              input: submitter_country
+          division:
+            function: identity
+            inputs:
+              input: division
+          insdc_accession_base:
+            function: identity
+            inputs:
+              input: insdc_accession_base
+          insdc_version:
+            function: identity
+            inputs:
+              input: insdc_version
+          insdc_accession_full:
+            function: identity
+            inputs:
+              input: insdc_accession_full
+          bioprojects:
+            function: identity
+            inputs:
+              input: bioprojects
+          biosample_accession:
+            function: identity
+            inputs:
+              input: biosample_accession
+          ncbi_completeness:
+            function: identity
+            inputs:
+              input: ncbi_completeness
+          ncbi_host_name:
+            function: identity
+            inputs:
+              input: ncbi_host_name
+          ncbi_host_tax_id:
+            function: identity
+            inputs:
+              input: ncbi_host_tax_id
+          ncbi_is_lab_host:
+            function: identity
+            inputs:
+              input: ncbi_is_lab_host
+          ncbi_length:
+            function: identity
+            inputs:
+              input: ncbi_length
+          ncbi_protein_count:
+            function: identity
+            inputs:
+              input: ncbi_protein_count
+          ncbi_update_date:
+            function: parse_timestamp
+            inputs:
+              timestamp: ncbi_update_date
+          ncbi_sourcedb:
+            function: identity
+            inputs:
+              input: ncbi_sourcedb
+          ncbi_virus_name:
+            function: identity
+            inputs:
+              input: ncbi_virus_name
+          ncbi_virus_tax_id:
+            function: identity
+            inputs:
+              input: ncbi_virus_tax_id
+          isolate_source:
+            function: identity
+            inputs:
+              input: isolate_source
+          sra_accessions:
+            function: identity
+            inputs:
+              input: sra_accessions
+          metadata_hash:
+            function: identity
+            inputs:
+              input: metadata_hash
+    ingest:
+      args:
+        - snakemake
+        - approve
+      image: ghcr.io/loculus-project/ingest
+      configFile:
+        compound_country_field: ncbi_geo_location
+        fasta_id_field: genbank_accession
+        rename:
+          genbank_accession: insdc_accession_full
+          ncbi_collection_date: collection_date
+          ncbi_isolate_name: isolate_name
+          ncbi_isolate_source: isolate_source
+          ncbi_sra_accessions: sra_accessions
+          ncbi_submitter_affiliation: author_affiliation
+          ncbi_submitter_country: submitter_country
+          ncbi_submitter_names: authors
+        keep:
+          - division
+          - country
+          - submissionId
+          - insdc_accession_base
+          - insdc_version
+          - bioprojects
+          - biosample_accession
+          - ncbi_completeness
+          - ncbi_host_name
+          - ncbi_host_tax_id
+          - ncbi_is_lab_host
+          - ncbi_length
+          - ncbi_protein_count
+          - ncbi_release_date
+          - ncbi_update_date
+          - ncbi_sourcedb
+          - ncbi_virus_name
+          - ncbi_virus_tax_id
+        taxon_id: 186538
+        all_fields:
+          - accession
+          - bioprojects
+          - biosample-acc
+          - completeness
+          - gene-count
+          - geo-location
+          - geo-region
+          - host-common-name
+          - host-infraspecific-breed
+          - host-infraspecific-cultivar
+          - host-infraspecific-ecotype
+          - host-infraspecific-isolate
+          - host-infraspecific-sex
+          - host-infraspecific-strain
+          - host-name
+          - host-pangolin
+          - host-tax-id
+          - is-annotated
+          - is-complete
+          - is-lab-host
+          - is-vaccine-strain
+          - isolate-collection-date
+          - isolate-lineage
+          - isolate-lineage-source
+          - lab-host
+          - length
+          - matpeptide-count
+          - mol-type
+          - nucleotide-completeness
+          - protein-count
+          - purpose-of-sampling
+          - release-date
+          - sourcedb
+          - sra-accs
+          - submitter-affiliation
+          - submitter-country
+          - submitter-names
+          - update-date
+          - virus-common-name
+          - virus-infraspecific-breed
+          - virus-infraspecific-cultivar
+          - virus-infraspecific-ecotype
+          - virus-infraspecific-isolate
+          - virus-infraspecific-sex
+          - virus-infraspecific-strain
+          - virus-name
+          - virus-pangolin
+          - virus-tax-id
+        column_mapping:
+          Accession: genbank_accession
+          BioProjects: bioprojects
+          BioSample accession: biosample_accession
+          Completeness: ncbi_completeness
+          Gene count: ncbi_gene_count
+          Geographic Location: ncbi_geo_location
+          Geographic Region: ncbi_geo_region
+          Host Common Name: ncbi_host_common_name
+          Host Infraspecific Names Breed: ncbi_host_breed
+          Host Infraspecific Names Cultivar: ncbi_host_cultivar
+          Host Infraspecific Names Ecotype: ncbi_host_ecotype
+          Host Infraspecific Names Isolate: ncbi_host_isolate
+          Host Infraspecific Names Sex: ncbi_host_sex
+          Host Infraspecific Names Strain: ncbi_host_strain
+          Host Name: ncbi_host_name
+          Host Pangolin Classification: ncbi_host_pangolin
+          Host Taxonomic ID: ncbi_host_tax_id
+          Is Annotated: ncbi_is_annotated
+          Is Complete: ncbi_is_complete
+          Is Lab Host: ncbi_is_lab_host
+          Is Vaccine Strain: ncbi_is_vaccine_strain
+          Isolate Collection date: ncbi_collection_date
+          Isolate Lineage: ncbi_isolate_name
+          Isolate Lineage source: ncbi_isolate_source
+          Lab Host: ncbi_lab_host
+          Length: ncbi_length
+          Mature peptide count: ncbi_mature_peptide_count
+          Molecule type: ncbi_mol_type
+          Nucleotide completeness: ncbi_nucleotide_completeness
+          Protein count: ncbi_protein_count
+          Purpose of Sampling: ncbi_purpose_of_sampling
+          Release date: ncbi_release_date
+          Source database: ncbi_sourcedb
+          SRA Accessions: ncbi_sra_accessions
+          Submitter Affiliation: ncbi_submitter_affiliation
+          Submitter Country: ncbi_submitter_country
+          Submitter Names: ncbi_submitter_names
+          Update date: ncbi_update_date
+          Virus Common Name: ncbi_virus_common_name
+          Virus Infraspecific Names Breed: ncbi_virus_breed
+          Virus Infraspecific Names Cultivar: ncbi_virus_cultivar
+          Virus Infraspecific Names Ecotype: ncbi_virus_ecotype
+          Virus Infraspecific Names Isolate: ncbi_virus_isolate
+          Virus Infraspecific Names Sex: ncbi_virus
+          Virus Infraspecific Names Strain: ncbi_virus_strain
+          Virus Name: ncbi_virus_name
+          Virus Pangolin Classification: ncbi_virus_pangolin
+          Virus Taxonomic ID: ncbi_virus_tax_id
+        group_name: insdc_ingest_group
+        username : insdc_ingest_user
+        password : insdc_ingest_user
+        keycloak_client_id : test-cli
     referenceGenomes:
       nucleotideSequences:
         - name: "main"