From 861e0a6d20b01d65c90435872ac784e6eb1b9781 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 19 Jun 2024 17:19:42 +0100
Subject: [PATCH 01/15] Initial project setup, ORM.

---
 .github/workflows/run-tests.yml             |  55 +++++
 README.md                                   |  20 +-
 pyproject.toml                              |  30 +++
 src/npg_notify/__init__.py                  |   0
 src/npg_notify/config.py                    |  72 +++++++
 src/npg_notify/db/__init__.py               |   0
 src/npg_notify/db/mlwh.py                   | 112 +++++++++++
 src/npg_notify/db/utils.py                  | 212 ++++++++++++++++++++
 tests/conftest.py                           |  39 ++++
 tests/data/mlwh_fixtures/100-Study.yml      |  18 ++
 tests/data/mlwh_fixtures/200-StudyUsers.yml |  36 ++++
 tests/data/qc_state_app_config.ini          |   9 +
 tests/test_retrieve_contacts.py             |  19 ++
 13 files changed, 621 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/run-tests.yml
 create mode 100644 pyproject.toml
 create mode 100644 src/npg_notify/__init__.py
 create mode 100644 src/npg_notify/config.py
 create mode 100644 src/npg_notify/db/__init__.py
 create mode 100644 src/npg_notify/db/mlwh.py
 create mode 100644 src/npg_notify/db/utils.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/data/mlwh_fixtures/100-Study.yml
 create mode 100644 tests/data/mlwh_fixtures/200-StudyUsers.yml
 create mode 100644 tests/data/qc_state_app_config.ini
 create mode 100644 tests/test_retrieve_contacts.py

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
new file mode 100644
index 0000000..1d7274a
--- /dev/null
+++ b/.github/workflows/run-tests.yml
@@ -0,0 +1,55 @@
+name: Test
+
+on:
+  push:
+    branches: [master, devel]
+  pull_request:
+    branches: [master, devel] 
+
+jobs:
+
+  test:
+    runs-on: ubuntu-latest
+
+    services:
+      mysql:
+        image: "mysql:8.0"
+        ports:
+          - "3306:3306"
+        options: >-
+          --health-cmd "mysqladmin ping"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 10
+        env:
+          MYSQL_RANDOM_ROOT_PASSWORD: yes
+          MYSQL_TCP_PORT: 3306
+          MYSQL_USER: "test"
+          MYSQL_PASSWORD: "test"
+          MYSQL_DATABASE: "study_notify"
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Poetry
+        run: |
+          pipx install poetry
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          architecture: 'x64'
+
+      - name: Run poetry install
+        run: |
+          poetry env use '3.11'
+          poetry install
+
+      - name: Run pytest
+        run: |
+          poetry run pytest
+
+      - name: Run linter (ruff)
+        run: |
+          poetry run ruff check --output-format=github .
+
diff --git a/README.md b/README.md
index c9c0c6b..89e4ad6 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,19 @@
-# npg_notifications
\ No newline at end of file
+# npg_notifications
+
+A utility for notifying customers about lifecycle events in the analisys
+and QC of sequencing data.
+
+[Porch](https://github.com/wtsi-npg/npg_porch) service is used to queue and
+process (send) notifications. The notification producer can repeatedly send
+to `porch` the same notification. `Porch` guarantees that the repeated
+notifications are not kept and, therefor, not processed.
+
+The consumer of the notifications is responsible for sending the message
+to the customer. For a fully automated system the consumer should implement
+the correct protocol for dealing with failed attempts to notify the customer.
+
+If different types of notification (for example, an e-mail and a MQ message)
+have to be sent for the same event, it is advised either to use a separate
+`porch` pipeline for each type of notification or to include additional
+information about the notification protocol and format into the payload that
+is sent to `porch`.
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..10d2334
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,30 @@
+[tool.poetry]
+name = "npg_notify"
+version = "0.0.1"
+description = "Utility for client notifications"
+authors = ["Marina Gourtovaia <mg8@sanger.ac.uk>"]
+license = "GPL-3.0-or-later"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.11"
+SQLAlchemy = { version = "^2.0.1", extras = ["pymysql"] }
+SQLAlchemy-Utils = "^0.41.2"
+cryptography = { version = "^41.0.3" }
+
+[tool.poetry.dev-dependencies]
+pytest = "^8.2.2"
+PyYAML = "^6.0.0" 
+ruff = "^0.4.9"
+
+[tool.ruff]
+# Set the maximum line length to 79.
+line-length = 79
+
+[tool.pytest.ini_options]
+addopts = [
+    "--import-mode=importlib",
+]
+pythonpath = [
+  "src"
+]
diff --git a/src/npg_notify/__init__.py b/src/npg_notify/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/npg_notify/config.py b/src/npg_notify/config.py
new file mode 100644
index 0000000..fe01cb8
--- /dev/null
+++ b/src/npg_notify/config.py
@@ -0,0 +1,72 @@
+import configparser
+import pathlib
+import json
+
+"""Common utility functions for the package."""
+
+DEFAULT_CONF_FILE_TYPE = "ini"
+
+
+def get_config_data(conf_file_path: str, conf_file_section: str = None):
+    """
+    Parses a configuration file and returns its content.
+
+    Allows for two types of configuration files, 'ini' and 'json'. The type of
+    the file is determined from the extension of the file name. In case of no
+    extension an 'ini' type is assumed.
+
+    Args:
+
+      conf_file_path:
+        A configuration file with database connection details.
+      conf_file_section:
+        The section of the configuration file. Optional. Should be defined
+        for 'ini' files.
+
+    Returns:
+      For an 'ini' file returns the content of the given section of the file as
+      a dictionary.
+      For a 'json' file, if the conf_file_section argument is not defined, the
+      content of a file as a Python object is returned. If the conf_file_section
+      argument is defined, the object returned by the parser is assumed to be
+      a dictionary that has the value of the 'conf_file_section' argument as a key.
+      The value corresponding to this key is returned.
+    """
+
+    conf_file_extention = pathlib.Path(conf_file_path).suffix
+    if conf_file_extention:
+        conf_file_extention = conf_file_extention[1:]
+    else:
+        conf_file_extention = DEFAULT_CONF_FILE_TYPE
+
+    if conf_file_extention == DEFAULT_CONF_FILE_TYPE:
+        if not conf_file_section:
+            raise Exception(
+                "'conf_file_section' argument is not given, "
+                "it should be defined for '{DEFAULT_CONF_FILE_TYPE}' "
+                "configuration file."
+            )
+
+        config = configparser.ConfigParser()
+        config.read(conf_file_path)
+
+        return {i[0]: i[1] for i in config[conf_file_section].items()}
+
+    elif conf_file_extention == "json":
+        conf: dict = json.load(conf_file_path)
+        if conf_file_section:
+            if isinstance(conf, dict) is False:
+                raise Exception(f"{conf_file_path} does not have sections.")
+            if conf_file_section in conf.keys:
+                conf = conf[conf_file_section]
+            else:
+                raise Exception(
+                    f"{conf_file_path} does not contain {conf_file_section} key"
+                )
+
+        return conf
+
+    else:
+        raise Exception(
+            f"Parsing for '{conf_file_extention}' files is not implemented"
+        )
diff --git a/src/npg_notify/db/__init__.py b/src/npg_notify/db/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/npg_notify/db/mlwh.py b/src/npg_notify/db/mlwh.py
new file mode 100644
index 0000000..33ff36c
--- /dev/null
+++ b/src/npg_notify/db/mlwh.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2024 Genome Research Ltd.
+#
+# Authors:
+#   Marina Gourtovaia <mg8@sanger.ac.uk>
+#   Kieron Taylor <kt19@sanger.ac.uk>
+#
+# This file is part of npg_notifications software package..
+#
+# npg_notifications is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Sftware Foundation; either version 3 of the License, or any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+from sqlalchemy import ForeignKey, UniqueConstraint, Integer, String, select
+from sqlalchemy.orm import (
+    DeclarativeBase,
+    Mapped,
+    mapped_column,
+    relationship,
+    Session,
+)
+from sqlalchemy.exc import NoResultFound
+
+"""
+Declarative ORM for some tables of multi-lims warehouse.
+For simplicity, only columns used by this package are represented.
+"""
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class Study(Base):
+    __tablename__ = "study"
+
+    id_study_tmp = mapped_column(Integer, primary_key=True, autoincrement=True)
+    id_lims = mapped_column(String(10), nullable=False)
+    id_study_lims = mapped_column(String(20), nullable=False)
+    name = mapped_column(String(255))
+
+    (
+        UniqueConstraint(
+            "id_lims",
+            "id_study_lims",
+            name="study_id_lims_id_study_lims_index",
+        ),
+    )
+
+    study_users: Mapped[set["StudyUsers"]] = relationship()
+
+    def __repr__(self):
+        return f"Study {self.id_study_lims}, {self.name}"
+
+    def contacts(self) -> list[str]:
+        roles = ["manager", "follower", "owner"]
+        return sorted(
+            list(
+                {
+                    u.email
+                    for u in self.study_users
+                    if (u.email is not None and u.role is not None)
+                    and (u.role in roles)
+                }
+            )
+        )
+
+
+class StudyUsers(Base):
+    __tablename__ = "study_users"
+
+    id_study_users_tmp = mapped_column(
+        Integer, primary_key=True, autoincrement=True
+    )
+    id_study_tmp = mapped_column(
+        Integer, ForeignKey("study.id_study_tmp"), nullable=False, index=True
+    )
+    role = mapped_column(String(255), nullable=True)
+    email = mapped_column(String(255), nullable=True)
+
+    study: Mapped["Study"] = relationship(back_populates="study_users")
+
+    def __repr__(self):
+        role = self.role if self.role else "None"
+        email = self.email if self.email else "None"
+        return f"StudyUser role={role}, email={email}"
+
+
+class StudyNotFoundError(Exception):
+    pass
+
+
+def get_study_contacts(session: Session, id: str):
+    try:
+        contacts = (
+            session.execute(select(Study).where(Study.id_study_lims == id))
+            .scalar_one()
+            .contacts()
+        )
+    except NoResultFound:
+        raise StudyNotFoundError(
+            f"Study with ID {id} is not found in mlwarehouse"
+        )
+
+    return contacts
diff --git a/src/npg_notify/db/utils.py b/src/npg_notify/db/utils.py
new file mode 100644
index 0000000..95d2363
--- /dev/null
+++ b/src/npg_notify/db/utils.py
@@ -0,0 +1,212 @@
+import os
+import importlib
+import pathlib
+import re
+import yaml
+
+from contextlib import contextmanager
+
+from sqlalchemy import create_engine, text, insert
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, DeclarativeBase
+from sqlalchemy_utils import create_database, database_exists, drop_database
+
+from npg_notify.config import get_config_data
+
+
+def db_credentials_from_config_file(
+    conf_file_path: str, conf_file_section: str = None
+):
+    """Parses a configuration file, generates a database connection string.
+
+    Args:
+      conf_file_path:
+        A configuration file with database connection details.
+      conf_file_section:
+        The section of the configuration file. Optional.
+
+    Returns:
+      MySQL connection string suitable for SQLAchemy
+    """
+
+    config = get_config_data(
+        conf_file_path=conf_file_path, conf_file_section=conf_file_section
+    )
+    if "dbschema" not in config:
+        raise Exception("Database schema name should be defined as dbschema")
+
+    user_creds = config["dbuser"]
+    # Not having a password is not that unusual for read-only access.
+    if "dbpassword" in config:
+        user_creds += ":" + config["dbpassword"]
+
+    return (
+        f"mysql+pymysql://{user_creds}@"
+        f"{config['dbhost']}:{config['dbport']}/{config['dbschema']}?charset=utf8mb4"
+    )
+
+
+def get_db_connection_string(
+    conf_file_path: str, conf_file_section: str = None
+):
+    """Generates a database connection string from supplied database credentials.
+
+    Args:
+      conf_file_path:
+        A configuration file with database connection details. If the
+        configuration file does not exist, assumes that the value is the name
+        of the environment variable that holds the database connection string.
+      conf_file_section:
+        The section of the configuration file. Optional.
+
+    Returns:
+      MySQL connection string suitable for SQLAchemy
+    """
+    try:
+        if os.path.exists(conf_file_path):
+            url = db_credentials_from_config_file(
+                conf_file_path, conf_file_section
+            )
+        else:
+            url = os.environ.get("conf_file_path")
+            if url is None or url == "":
+                raise Exception(
+                    f"{conf_file_path} is not a file path, neither it is a defined env. variable"
+                )
+    except Exception as err:
+        raise Exception(
+            "Failed to get db credentials: " + str(err.with_traceback(None))
+        )
+
+    return url
+
+
+@contextmanager
+def get_connection(
+    conf_file_path: str, conf_file_section: str = None
+) -> Session:
+    """Connects to MySQL database and returns a database session.
+
+    Using database credentials specified in the configuration file, establishes
+    a connection to MySQL database and returns sqlalchemy.orm.Session object.
+
+    Args:
+      conf_file_path:
+        A configuration file with database connection details.
+      conf_file_section:
+        The section of the configuration file. Optional.
+
+    Returns:
+      sqlalchemy.orm.Session object
+
+    Example:
+        with get_connection(
+            conf_file_path=test_config, conf_file_section="MySQL MLWH"
+        ) as session:
+            pass
+    """
+
+    url = get_db_connection_string(
+        conf_file_path=conf_file_path, conf_file_section=conf_file_section
+    )
+    engine: Engine = create_engine(url, echo=False)
+    session = Session(engine)
+    try:
+        yield session
+    finally:
+        session.close()
+
+
+def create_schema(
+    base: DeclarativeBase,
+    conf_file_path: str,
+    conf_file_section: str = None,
+    drop: bool = False,
+):
+    """Connects to MySQL database, creates a new schema.
+
+    This method is good to use in unit tests. While it can be used for
+    creating production instances from scratch, the correctness of the created
+    schema cannot be guaranteed.
+
+    Args:
+      base:
+        sqlalchemy.orm.DeclarativeBase object for the schema to be loaded.
+      conf_file_path:
+        A configuration file with database connection details.
+      conf_file_section:
+        The section of the configuration file. Optional.
+      drop:
+        A boolean option, defaults to False. If True, the existing tables of
+        the database schema are dropped.
+    """
+
+    url = get_db_connection_string(
+        conf_file_path=conf_file_path, conf_file_section=conf_file_section
+    )
+    engine: Engine = create_engine(url, echo=False)
+
+    if database_exists(engine.url):
+        if drop is True:
+            drop_database(engine.url)
+        else:
+            raise Exception(
+                "Cannot create a new database: "
+                "Database exists, drop_database option is False"
+            )
+
+    create_database(engine.url)
+    base.metadata.create_all(engine)
+    with engine.connect() as conn:
+        # Workaround for invalid default values for dates.
+        # Needed only in CI.
+        conn.execute(text("SET sql_mode = '';"))
+        conn.commit()
+
+
+def batch_load_from_yaml(
+    session: Session, module: str, fixtures_dir_path: str
+):
+    """Loads data to the database.
+
+    This method is good for use in unit tests, it is not intended
+    for production use.
+
+    Args:
+      session:
+        sqlalchemy.orm.Session object
+      module:
+        A string representing the name of the module where ORM classes
+        for the database tables are defined.
+      fixtures_dir_path:
+        A path to the directory with YAML files containing data to load, one
+        file per table. The names of the files should follow the pattern
+        200-<ORM_CLASS_NAME>.yml The integer prefix can be any number,
+        data from files with lower value of the prefix are loader first.
+
+    Example:
+        batch_load_from_yaml(
+            session=session,
+            fixtures_dir_path="tests/data/mlwh_fixtures",
+            module="npg_notify.db.mlwh",
+        )
+    """
+    # Load the schema module where the table ORM classes are defined.
+    module = importlib.import_module(module)
+    # Find all files in a given directory.
+    dir_obj = pathlib.Path(fixtures_dir_path)
+    file_paths = list(str(f) for f in dir_obj.iterdir())
+    file_paths.sort()
+
+    for file_path in file_paths:
+        with open(file_path, "r") as f:
+            (head, file_name) = os.path.split(file_path)
+            # File name example: 200-PacBioRun.yml
+            m = re.match(r"\A\d+-([a-zA-Z]+)\.yml\Z", file_name)
+            if m is not None:
+                class_name = m.group(1)
+                table_class = getattr(module, class_name)
+                data = yaml.safe_load(f)
+                session.execute(insert(table_class), data)
+
+    session.commit()
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..89c0fe1
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,39 @@
+import pytest
+import os
+
+from npg_notify.db.utils import (
+    create_schema,
+    get_connection,
+    batch_load_from_yaml,
+)
+from npg_notify.db.mlwh import Base
+
+TEST_CONFIG_FILE= "qc_state_app_config.ini"
+test_config = os.path.join(os.path.dirname(__file__), "data", TEST_CONFIG_FILE)
+
+
+@pytest.fixture(scope="module", name="mlwh_test_session")
+def get_test_db_session():
+    """
+    Establishes a connection to the database, creates a schema, loads
+    data and returns a new database session.
+    """
+    fixtures_dir = os.path.join(
+        os.path.dirname(__file__), "data/mlwh_fixtures"
+    )
+    create_schema(
+        base=Base,
+        drop=True,
+        conf_file_path=test_config,
+        conf_file_section="MySQL MLWH",
+    )
+
+    with get_connection(
+        conf_file_path=test_config, conf_file_section="MySQL MLWH"
+    ) as session:
+        batch_load_from_yaml(
+            session=session,
+            fixtures_dir_path=fixtures_dir,
+            module="npg_notify.db.mlwh",
+        )
+        return session
diff --git a/tests/data/mlwh_fixtures/100-Study.yml b/tests/data/mlwh_fixtures/100-Study.yml
new file mode 100644
index 0000000..b30857f
--- /dev/null
+++ b/tests/data/mlwh_fixtures/100-Study.yml
@@ -0,0 +1,18 @@
+- name: Study two name
+  id_lims: SQSCP
+  id_study_lims: 2
+  id_study_tmp: 1
+- name: Study three name
+  id_lims: SQSCP
+  id_study_lims: 3
+  id_study_tmp: 2
+- name: Study four name
+  id_lims: SQSCP
+  id_study_lims: 4
+  id_study_tmp: 3
+- name: Study five name
+  id_lims: SQSCP
+  id_study_lims: 5
+  id_study_tmp: 4
+
+
diff --git a/tests/data/mlwh_fixtures/200-StudyUsers.yml b/tests/data/mlwh_fixtures/200-StudyUsers.yml
new file mode 100644
index 0000000..9bc1014
--- /dev/null
+++ b/tests/data/mlwh_fixtures/200-StudyUsers.yml
@@ -0,0 +1,36 @@
+- email: user2@sanger.ac.uk
+  id_study_tmp: 2
+  id_study_users_tmp: 1
+  role: manager
+- email: user3@sanger.ac.uk
+  id_study_tmp: 2
+  id_study_users_tmp: 2
+  role: follower
+- email: user3@sanger.ac.uk
+  id_study_tmp: 2
+  id_study_users_tmp: 8
+  role: owner
+- email: user1@sanger.ac.uk
+  id_study_tmp: 2
+  id_study_users_tmp: 3
+  role: owner
+- email: owner@sanger.ac.uk
+  id_study_tmp: 2
+  id_study_users_tmp: 7
+  role: owner
+- email: ~
+  id_study_tmp: 2
+  id_study_users_tmp: 4
+  role: ~
+- email: loader@sanger.ac.uk
+  id_study_tmp: 3
+  id_study_users_tmp: 5
+  role: loader
+- email: user1@sanger.ac.uk
+  id_study_tmp: 4
+  id_study_users_tmp: 6
+  role: ~
+- email: ~
+  id_study_tmp: 4
+  id_study_users_tmp: 9
+  role: owner
\ No newline at end of file
diff --git a/tests/data/qc_state_app_config.ini b/tests/data/qc_state_app_config.ini
new file mode 100644
index 0000000..0d98135
--- /dev/null
+++ b/tests/data/qc_state_app_config.ini
@@ -0,0 +1,9 @@
+# Configuration for the npg_notify package.
+
+[MySQL MLWH]
+
+dbuser = test
+dbpassword = test
+dbhost = 127.0.0.1
+dbport = 3306
+dbschema = study_notify
diff --git a/tests/test_retrieve_contacts.py b/tests/test_retrieve_contacts.py
new file mode 100644
index 0000000..6e1c353
--- /dev/null
+++ b/tests/test_retrieve_contacts.py
@@ -0,0 +1,19 @@
+import pytest
+
+from npg_notify.db.mlwh import get_study_contacts, StudyNotFoundError
+
+
+def test_retrieving_study_contacts(mlwh_test_session):
+    with pytest.raises(
+        StudyNotFoundError,
+        match=r"Study with ID 666 is not found in mlwarehouse",
+    ):
+        get_study_contacts(mlwh_test_session, "666")
+
+    for id in ["2", "5", "4"]:
+        assert get_study_contacts(mlwh_test_session, id) == []
+
+    users = ["owner", "user1", "user2", "user3"]
+    assert get_study_contacts(mlwh_test_session, "3") == [
+        f"{u}@sanger.ac.uk" for u in users
+    ]

From 4d011ff50fc214d31a2e5ffb9f8b236a1bf22693 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 21 Jun 2024 14:51:20 +0100
Subject: [PATCH 02/15] Added a script to register QC states with Porch.

---
 pyproject.toml                           |  11 +-
 src/npg_notify/porch_wrapper/qc_state.py | 151 +++++++++++++++++++++++
 tests/data/qc_state_app_config.ini       |  14 +++
 3 files changed, 173 insertions(+), 3 deletions(-)
 create mode 100644 src/npg_notify/porch_wrapper/qc_state.py

diff --git a/pyproject.toml b/pyproject.toml
index 10d2334..c74d20c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,15 +6,20 @@ authors = ["Marina Gourtovaia <mg8@sanger.ac.uk>"]
 license = "GPL-3.0-or-later"
 readme = "README.md"
 
+[tool.poetry.scripts]
+qc_state_notification = "npg_notify.porch_wrapper.qc_state:run" 
+
 [tool.poetry.dependencies]
 python = "^3.11"
-SQLAlchemy = { version = "^2.0.1", extras = ["pymysql"] }
+SQLAlchemy = { version="^2.0.1", extras=["pymysql"] }
 SQLAlchemy-Utils = "^0.41.2"
-cryptography = { version = "^41.0.3" }
+cryptography = { version="^41.0.3" }
+npg_porch_cli = { git="https://github.com/wtsi-npg/npg_porch_cli.git", branch="devel" }
 
 [tool.poetry.dev-dependencies]
 pytest = "^8.2.2"
-PyYAML = "^6.0.0" 
+PyYAML = "^6.0.0"
+requests-mock = "^1.12.1"
 ruff = "^0.4.9"
 
 [tool.ruff]
diff --git a/src/npg_notify/porch_wrapper/qc_state.py b/src/npg_notify/porch_wrapper/qc_state.py
new file mode 100644
index 0000000..9a7f1d8
--- /dev/null
+++ b/src/npg_notify/porch_wrapper/qc_state.py
@@ -0,0 +1,151 @@
+import argparse
+import os
+from urllib.parse import urljoin
+
+import requests
+from npg_notify.config import get_config_data
+from npg_porch_cli.api import Pipeline, PorchAction
+from npg_porch_cli.api import send as send_porch_request
+
+LANGQC_CONFIG_FILE_SECTION = "LANGQC"
+PORCH_CONFIG_FILE_SECTION = "PORCH"
+
+VERIFY_CERTIFICATE = False  # TODO: change to True
+CLIENT_TIMEOUT = (10, 60)
+
+
+def run():
+    parser = argparse.ArgumentParser(
+        prog="qc_state_notification",
+        description="Creates or processes notifications about QC states.",
+    )
+    parser.add_argument(
+        "action",
+        type=str,
+        help="Action to send to npg_porch server API",
+        choices=["register"],
+    )
+    parser.add_argument(
+        "--conf_file_path",
+        type=str,
+        required=True,
+        help="Configuration file path.",
+    )
+    args = parser.parse_args()
+
+    create_tasks(conf_file_path=args.conf_file_path)
+
+
+def create_tasks(conf_file_path: str):
+    """Retrieves and registers with Porch recently assigned QC states.
+
+    Retrieves from LangQC API all recently (within the last four weeks) assigned
+    final QC states for PacBio wells. Registers these states as pending with
+    one of the pipelines registered with Porch API. URLs to use with these APIs,
+    details of the Porch pipeline and Porch authorization token should be given
+    in the configuration file.
+
+    All errors in task registration are captured and logged.
+
+    Args:
+      conf_file_path:
+        Configuration file path for this application.
+    """
+
+    porch_conf = get_config_data(
+        conf_file_path=conf_file_path,
+        conf_file_section=PORCH_CONFIG_FILE_SECTION,
+    )
+    # query LangQC for recently QC-ed wells
+    pipeline = Pipeline(
+        name=porch_conf["pipeline_name"],
+        uri=porch_conf["pipeline_uri"],
+        version=porch_conf["pipeline_version"],
+    )
+
+    langqc_conf = get_config_data(
+        conf_file_path=conf_file_path,
+        conf_file_section=LANGQC_CONFIG_FILE_SECTION,
+    )
+    qc_states = _send_request(
+        url=urljoin(langqc_conf["api_url"], langqc_conf["recently_qced_url"])
+    )
+
+    num_products = len(qc_states)
+    print(
+        f"Retrieved QC states for {num_products} products."
+    )  # TODO: log properly
+
+    os.environ["NPG_PORCH_TOKEN"] = porch_conf["npg_porch_token"]
+    del porch_conf["npg_porch_token"]
+
+    # qc_states is a dictionary where keys are product IDs and values are
+    # lists of QC states. A list of one QC state is expected.
+    num_errors = 0
+    for product_id, qc_state_data in qc_states.items():
+        try:
+            create_task(
+                porch_config=porch_conf,
+                pipeline=pipeline,
+                qc_state=qc_state_data[0],
+            )
+            # TODO: in DEBUG mode log the new task returned by this call.
+        except Exception as err:
+            # TODO: log the error message as an error
+            print(
+                f"Error registering QC state for product {product_id}: {str(err)}"
+            )
+            num_errors += 1
+
+    del os.environ["NPG_PORCH_TOKEN"]
+
+    print(
+        f"{num_errors} errors when registering products."
+        f"Registered QC states for {num_products-num_errors} products."
+    )  # TODO: log properly
+
+
+def create_task(
+    porch_config: dict[str], pipeline: Pipeline, qc_state: dict[str]
+):
+    """Creates and queues a single porch task for the notification pipeline.
+
+    Args:
+      porch_config:
+        A dictionary of porch-related configuration parameters.
+      pipeline:
+        npg_porch_cli.api.Pipeline object
+      qc_state:
+        A Python object that can be encoded into a JSON string. This object
+        uniquely defines a porch task for a given pipeline.
+    """
+
+    action = PorchAction(
+        validate_ca_cert=VERIFY_CERTIFICATE,
+        porch_url=porch_config["api_url"],
+        action="add_task",
+        task_input=qc_state,
+    )
+    send_porch_request(action=action, pipeline=pipeline)
+
+
+def _send_request(url: str):
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+    request_args = {
+        "headers": headers,
+        "timeout": CLIENT_TIMEOUT,
+        "verify": VERIFY_CERTIFICATE,
+    }
+
+    response = requests.request("GET", url, **request_args)
+    if not response.ok:
+        raise Exception(
+            f"Sending GET request to {url} failed. "
+            f'Status code {response.status_code} "{response.reason}" '
+            f"received from {response.url}"
+        )
+
+    return response.json()
diff --git a/tests/data/qc_state_app_config.ini b/tests/data/qc_state_app_config.ini
index 0d98135..66e686c 100644
--- a/tests/data/qc_state_app_config.ini
+++ b/tests/data/qc_state_app_config.ini
@@ -7,3 +7,17 @@ dbpassword = test
 dbhost = 127.0.0.1
 dbport = 3306
 dbschema = study_notify
+
+[LANGQC]
+
+api_url = https://langqc.com
+recently_qced_url = api/products/qc
+
+[PORCH]
+
+api_url = https://porch.com
+pipeline_name = qc_notifications
+pipeline_uri = https://github.com/mgcam/npg_notifications.git
+pipeline_version = 0.1
+npg_porch_token = aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+

From 510d17e312aac04ca2a829e0657e68620b028ffc Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 28 Jun 2024 13:56:32 +0100
Subject: [PATCH 03/15] Addded processing for one registered task

---
 src/npg_notify/mail.py                   | 149 +++++++++++
 src/npg_notify/porch_wrapper/qc_state.py | 309 +++++++++++++++++++----
 tests/conftest.py                        |   8 +-
 tests/data/qc_state_app_config.ini       |  18 +-
 tests/test_generate_and_send_email.py    | 229 +++++++++++++++++
 5 files changed, 664 insertions(+), 49 deletions(-)
 create mode 100644 src/npg_notify/mail.py
 create mode 100644 tests/test_generate_and_send_email.py

diff --git a/src/npg_notify/mail.py b/src/npg_notify/mail.py
new file mode 100644
index 0000000..bf0503a
--- /dev/null
+++ b/src/npg_notify/mail.py
@@ -0,0 +1,149 @@
+import logging
+import os
+import smtplib
+from email.message import EmailMessage
+from email.utils import parseaddr
+
+logger = logging.getLogger("npg_notify")
+
+
+def send_notification(
+    domain: str, contacts: list[str], subject: str, content: str
+):
+    """Sends an email.
+
+    Sending an email succeeds if at least one recipient is likely to receive it.
+
+    Args:
+      domain:
+        The domain of the mail server which is used for sending the email.
+      contacts:
+        A non-empty list of valid email addresses.
+      subject:
+        The subject line of the email, a non-empty string.
+      content:
+        The content of the email, a non-empty string.
+    """
+    user_name = os.environ.get("USER")
+    if not user_name:
+        raise ValueError("USER not set in the environment.")
+
+    if subject == "":
+        raise ValueError("Email subject cannot be empty.")
+    if content == "":
+        raise ValueError("Email content cannot be empty.")
+
+    if len(contacts) == 0:
+        raise ValueError("List of contacts cannot not be empty.")
+    validated_contacts = [
+        valid_address
+        for valid_address in [parseaddr(addr)[1] for addr in contacts]
+        if valid_address != ""
+    ]
+    if len(validated_contacts) != len(contacts):
+        logger.warning(
+            "Some contact emails are invalid in " + ", ".join(contacts)
+        )
+    if len(validated_contacts) == 0:
+        raise ValueError(
+            "All contact emails are invalid in " + ", ".join(contacts)
+        )
+
+    recipients = ", ".join(validated_contacts)
+    logger.debug(f"Sending an email to {recipients}")
+
+    msg = EmailMessage()
+    msg.set_content(content)
+    msg["Subject"] = subject
+    msg["To"] = recipients
+    msg["From"] = f"{user_name}@{domain}"
+    s = smtplib.SMTP(f"mail.{domain}")
+    # Sending the message might fail, then there will be an error we
+    # do not have to catch here. Sending succeeds if at least one
+    # recipient is likely to receive the message. The return value
+    # contains information about failed attempts. If this happens it is
+    # safer to consider the message as being sent.
+    reply = s.send_message(msg)
+    s.quit()
+    if len(reply) != 0:
+        logger.warning(reply)
+
+
+def generate_email(
+    domain: str,
+    langqc_run_url: str,
+    irods_docs_url: str,
+    qc_outcome: dict,
+    well_data: dict,
+    libraries: list,
+) -> tuple[str, str]:
+    """Generates the subject line and the content for a notification.
+
+    Args:
+      domain:
+        The domain for any email addresses in the content of the notification.
+      langqc_run_url:
+        LangQC run URL for the recipient to refer to.
+      irods_docs_url:
+        iRODS documentation page URL for the recipient to refer to.
+      qc_outcome:
+        A dictionary representing information about the QC outcome for
+        for the PacBio well.
+      well_data:
+        A dictionary representing information about a well.
+      libraries:
+        A list of dictionaries, which represent individual samples.
+
+    Returns:
+      A tuple of two strings, the subject line and the content for the
+      email notification.
+    """
+    run_name = well_data["run_name"]
+    plate_number = (
+        well_data["plate_number"] if well_data["plate_number"] else "n/a"
+    )
+    outcome = "Undefined"
+    if qc_outcome["outcome"] is not None:
+        outcome = "Pass" if qc_outcome["outcome"] is True else "Fail"
+    study_id = libraries[0]["study_id"]
+    study_name = libraries[0]["study_name"]
+
+    subject = f"Study {study_id}: PacBio data is available"
+
+    lines = [
+        subject,
+        "",
+        f"Study name: {study_name}",
+        f"Run: {run_name}",
+        f"Well label: {well_data['label']}",
+        f"Plate number: {plate_number}",
+        f"QC outcome: {qc_outcome['qc_state']} ({outcome})",
+        "Samples:",
+    ]
+    num_samples = len(libraries)
+    num_samples_listed = num_samples if num_samples <= 5 else 5
+    for name in [
+        lib["sample_name"] for lib in libraries[0:num_samples_listed]
+    ]:
+        lines.append(f"\t{name},")
+
+    if num_samples > num_samples_listed:
+        lines.append("\t.....")
+
+    lines = lines + [
+        f"\t{num_samples} sample"
+        + ("s" if num_samples > 1 else "")
+        + " in total",
+        "",
+        "The QC review is complete and your data should now be available from"
+        + f" iRODS (see {irods_docs_url}).",
+        f"QC information for this run: {'/'.join([langqc_run_url, run_name])}.",
+        "",
+        "If you have any questions or need further assistance, please feel "
+        + "free to reach out to a Scientific Service Representative at "
+        + f"dnap-ssr@{domain}.",
+        "",
+        "NPG on behalf of DNA Pipelines\n",
+    ]
+
+    return (subject, "\n".join(lines))
diff --git a/src/npg_notify/porch_wrapper/qc_state.py b/src/npg_notify/porch_wrapper/qc_state.py
index 9a7f1d8..95dd562 100644
--- a/src/npg_notify/porch_wrapper/qc_state.py
+++ b/src/npg_notify/porch_wrapper/qc_state.py
@@ -1,20 +1,32 @@
 import argparse
+import logging
 import os
+import re
+import sys
+import time
 from urllib.parse import urljoin
 
-import requests
 from npg_notify.config import get_config_data
+from npg_notify.db.mlwh import get_study_contacts
+from npg_notify.db.utils import db_credentials_from_config_file, get_connection
+from npg_notify.mail import generate_email, send_notification
+from npg_porch_cli import send_request
 from npg_porch_cli.api import Pipeline, PorchAction
 from npg_porch_cli.api import send as send_porch_request
 
+SSL_CONFIG_FILE_SECTION = "SSL"
 LANGQC_CONFIG_FILE_SECTION = "LANGQC"
 PORCH_CONFIG_FILE_SECTION = "PORCH"
+IRODS_CONFIG_FILE_SECTION = "IRODS"
+MAIL_CONFIG_FILE_SECTION = "MAIL"
 
-VERIFY_CERTIFICATE = False  # TODO: change to True
-CLIENT_TIMEOUT = (10, 60)
+logger = logging.getLogger("npg_notify")
 
 
 def run():
+    _configure_logger()
+    logger.info("Started")
+
     parser = argparse.ArgumentParser(
         prog="qc_state_notification",
         description="Creates or processes notifications about QC states.",
@@ -22,8 +34,8 @@ def run():
     parser.add_argument(
         "action",
         type=str,
-        help="Action to send to npg_porch server API",
-        choices=["register"],
+        help="A task to perform.",
+        choices=["register", "process"],
     )
     parser.add_argument(
         "--conf_file_path",
@@ -33,7 +45,21 @@ def run():
     )
     args = parser.parse_args()
 
-    create_tasks(conf_file_path=args.conf_file_path)
+    conf_file_path = args.conf_file_path
+    ssl_cert_file = _get_ssl_cert_file_path(conf_file_path=conf_file_path)
+    if ssl_cert_file:
+        os.environ["SSL_CERT_FILE"] = ssl_cert_file
+
+    action = args.action
+    if action == "register":
+        success = create_tasks(conf_file_path=conf_file_path)
+    elif action == "process":
+        success = process_task(conf_file_path=conf_file_path)
+    else:
+        raise Exception(f"Action '{action}' is not implemented")
+
+    logger.info("Finished")
+    exit(0 if success else 1)
 
 
 def create_tasks(conf_file_path: str):
@@ -56,25 +82,22 @@ def create_tasks(conf_file_path: str):
         conf_file_path=conf_file_path,
         conf_file_section=PORCH_CONFIG_FILE_SECTION,
     )
-    # query LangQC for recently QC-ed wells
-    pipeline = Pipeline(
-        name=porch_conf["pipeline_name"],
-        uri=porch_conf["pipeline_uri"],
-        version=porch_conf["pipeline_version"],
-    )
+    pipeline = _pipeline_object(porch_conf)
 
+    # query LangQC for recently QC-ed wells
     langqc_conf = get_config_data(
         conf_file_path=conf_file_path,
         conf_file_section=LANGQC_CONFIG_FILE_SECTION,
     )
-    qc_states = _send_request(
-        url=urljoin(langqc_conf["api_url"], langqc_conf["recently_qced_url"])
+    qc_states = send_request(
+        validate_ca_cert=_validate_ca_cert(),
+        url=urljoin(langqc_conf["api_url"], langqc_conf["recently_qced_url"]),
+        method="GET",
+        auth_type=None,
     )
 
     num_products = len(qc_states)
-    print(
-        f"Retrieved QC states for {num_products} products."
-    )  # TODO: log properly
+    logger.info(f"Retrieved QC states for {num_products} products.")
 
     os.environ["NPG_PORCH_TOKEN"] = porch_conf["npg_porch_token"]
     del porch_conf["npg_porch_token"]
@@ -84,25 +107,26 @@ def create_tasks(conf_file_path: str):
     num_errors = 0
     for product_id, qc_state_data in qc_states.items():
         try:
-            create_task(
+            task = create_task(
                 porch_config=porch_conf,
                 pipeline=pipeline,
                 qc_state=qc_state_data[0],
             )
-            # TODO: in DEBUG mode log the new task returned by this call.
+            logger.debug(f"Created a new task {task}")
         except Exception as err:
-            # TODO: log the error message as an error
-            print(
+            logger.error(
                 f"Error registering QC state for product {product_id}: {str(err)}"
             )
             num_errors += 1
 
     del os.environ["NPG_PORCH_TOKEN"]
 
-    print(
-        f"{num_errors} errors when registering products."
+    logger.info(
+        f"{num_errors} errors when registering products. "
         f"Registered QC states for {num_products-num_errors} products."
-    )  # TODO: log properly
+    )
+
+    return True if not num_errors else False
 
 
 def create_task(
@@ -121,7 +145,7 @@ def create_task(
     """
 
     action = PorchAction(
-        validate_ca_cert=VERIFY_CERTIFICATE,
+        validate_ca_cert=_validate_ca_cert(),
         porch_url=porch_config["api_url"],
         action="add_task",
         task_input=qc_state,
@@ -129,23 +153,222 @@ def create_task(
     send_porch_request(action=action, pipeline=pipeline)
 
 
-def _send_request(url: str):
-    headers = {
-        "Content-Type": "application/json",
-        "Accept": "application/json",
-    }
-    request_args = {
-        "headers": headers,
-        "timeout": CLIENT_TIMEOUT,
-        "verify": VERIFY_CERTIFICATE,
-    }
-
-    response = requests.request("GET", url, **request_args)
-    if not response.ok:
-        raise Exception(
-            f"Sending GET request to {url} failed. "
-            f'Status code {response.status_code} "{response.reason}" '
-            f"received from {response.url}"
+def process_task(conf_file_path):
+    """Processes one task for the email notification pipeline.
+
+    Performs the following steps:
+        1. claims one porch task,
+        2. gets necessary details about the product (PacBio well) from the
+           LangQC API,
+        3. for each study linked to the product gets contact details from
+           ml warehouse,
+        4  for each study sends an email notification,
+        5. using porch API, updates the claimed task status to `DONE`, `FAIL`,
+           or resets it back to `PENDING`.
+
+    If an error occurs at steps 2 or 3, the task will be re-assigned the
+    `PENDING` status.
+
+    On step 4 an attempt to send an email will be made separately for each
+    study. If this fails for one of the studies, the task will is assigned the
+    `FAIL` status.
+
+    Step 5 can fail, then the task will remain as claimed. Tasks like this
+    should be mopped up manually. The log will contain information about the
+    intended state of the task.
+
+    The event log (table) of the porch database contains information about all
+    task status updates. This information can be used to identify tasks, which
+    has their status updated repeatedly.
+    """
+
+    porch_config = get_config_data(
+        conf_file_path=conf_file_path,
+        conf_file_section=PORCH_CONFIG_FILE_SECTION,
+    )
+    langqc_conf = get_config_data(
+        conf_file_path=conf_file_path,
+        conf_file_section=LANGQC_CONFIG_FILE_SECTION,
+    )
+    irods_config = get_config_data(
+        conf_file_path=conf_file_path,
+        conf_file_section=IRODS_CONFIG_FILE_SECTION,
+    )
+    mail_config = get_config_data(
+        conf_file_path=conf_file_path,
+        conf_file_section=MAIL_CONFIG_FILE_SECTION,
+    )
+
+    # Get all config data or error before claiming the task.
+    porch_api_url = porch_config["api_url"]
+    pac_bio_well_libraries_url = langqc_conf["pac_bio_well_libraries_url"]
+    langqc_base_url = langqc_conf["api_url"]
+    pac_bio_run_iu_url = langqc_conf["pac_bio_run_iu_url"]
+    irods_docs_url = irods_config["user_manual_url"]
+    domain = mail_config["domain"]
+    # Check that database credentials are in place
+    db_credentials_from_config_file(
+        conf_file_path=conf_file_path, conf_file_section="MySQL MLWH"
+    )
+
+    os.environ["NPG_PORCH_TOKEN"] = porch_config["npg_porch_token"]
+    pipeline = _pipeline_object(porch_config)
+    action = PorchAction(
+        validate_ca_cert=_validate_ca_cert(),
+        porch_url=porch_api_url,
+        action="claim_task",
+    )
+    claimed_tasks = send_porch_request(action=action, pipeline=pipeline)
+    if len(claimed_tasks) == 0:
+        logger.info("No pending tasks returned from porch")
+        return True
+
+    claimed_task = claimed_tasks[0]
+    logger.debug(f"Claimed task {claimed_task}")
+
+    new_task_status = "DONE"
+
+    # Get product lims data from LangQC - a platform-specific step.
+    # For PacBio the product is a well.
+    product_id = claimed_task["task_input"]["id_product"]
+    try:
+        url = re.sub(
+            "\[\w+\]",
+            product_id,
+            pac_bio_well_libraries_url,
+        )
+        url = urljoin(langqc_base_url, url)
+        response = send_request(
+            validate_ca_cert=_validate_ca_cert(),
+            url=url,
+            method="GET",
+            auth_type=None,
         )
+        libraries_per_study = {}
+        for library in response["libraries"]:
+            study_id = library["study_id"]
+            if study_id in libraries_per_study:
+                libraries_per_study[study_id].append(library)
+            else:
+                libraries_per_study[study_id] = [library]
+
+        study_ids = libraries_per_study.keys()
+        contacts_per_study = {}
+
+        with get_connection(
+            conf_file_path=conf_file_path, conf_file_section="MySQL MLWH"
+        ) as session:
+            for study_id in study_ids:
+                contacts_per_study[study_id] = get_study_contacts(
+                    session=session, id=study_id
+                )
+
+    except Exception as err:
+        logger.error(str(err))
+        new_task_status = "PENDING"
+
+    task_input = claimed_task["task_input"]
+
+    if new_task_status == "DONE":
+        for study_id, contacts in contacts_per_study.items():
+            if len(contacts) == 0:
+                print(f"No contacts are registered for study {study_id}")
+                continue
+
+            try:
+                (subject, text) = generate_email(
+                    domain=domain,
+                    langqc_run_url=urljoin(
+                        langqc_base_url, pac_bio_run_iu_url
+                    ),
+                    irods_docs_url=irods_docs_url,
+                    qc_outcome=task_input,
+                    well_data=response,
+                    libraries=libraries_per_study[study_id],
+                )  # platform-specific step
+                send_notification(
+                    domain=domain,
+                    contacts=contacts,
+                    subject=subject,
+                    content=text,
+                )
+            except Exception as err:
+                logger.error(
+                    "Error generating or sending a notification: " + str(err)
+                )
+                new_task_status = "FAILED"
+
+    return _update_task_status(
+        porch_api_url, pipeline, task_input, new_task_status
+    )
+
 
-    return response.json()
+def _pipeline_object(porch_conf: dict):
+    return Pipeline(
+        name=porch_conf["pipeline_name"],
+        uri=porch_conf["pipeline_uri"],
+        version=porch_conf["pipeline_version"],
+    )
+
+
+def _update_task_status(porch_api_url, pipeline, task_input, task_status):
+    action = PorchAction(
+        validate_ca_cert=_validate_ca_cert(),
+        porch_url=porch_api_url,
+        action="update_task",
+        task_input=task_input,
+        task_status=task_status,
+    )
+
+    num_attempts = 3
+    i = 0
+    success = False
+
+    message = f"porch task {task_input} to status {task_status}"
+
+    while i < num_attempts:
+        try:
+            send_porch_request(action=action, pipeline=pipeline)
+            logger.info(f"Updated {message}")
+            success = True
+            i = num_attempts
+        except Exception as err:
+            logger.warning(
+                "Error: " + str(err) + f"\nwhile trying to update {message}"
+            )
+            if i != (num_attempts - 1):
+                time.sleep(60 + (i * 300))  # sleep 60 or 360 sec
+            i = +1
+    if not success:
+        logger.error(f"Failed to update {message}.")
+
+    return success
+
+
+def _get_ssl_cert_file_path(conf_file_path: str):
+    ssl_cert_file = None
+    try:
+        ssl_conf = get_config_data(
+            conf_file_path=conf_file_path,
+            conf_file_section=SSL_CONFIG_FILE_SECTION,
+        )
+        ssl_cert_file = ssl_conf["ssl_cert_file"]
+        if not os.path.isfile(ssl_cert_file):
+            ssl_cert_file = None
+    except Exception as err:
+        logger.warning(str(err))
+
+    return ssl_cert_file
+
+
+def _validate_ca_cert():
+    return True if "SSL_CERT_FILE" in os.environ else False
+
+
+def _configure_logger():
+    logging.basicConfig(
+        stream=sys.stderr,
+        level=logging.INFO,
+        datefmt="%Y:%m:%d %H:%M:%S",
+        format="%(asctime)s %(levelname)s %(message)s",
+    )
diff --git a/tests/conftest.py b/tests/conftest.py
index 89c0fe1..0450e15 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,14 +1,14 @@
-import pytest
 import os
 
+import pytest
+from npg_notify.db.mlwh import Base
 from npg_notify.db.utils import (
+    batch_load_from_yaml,
     create_schema,
     get_connection,
-    batch_load_from_yaml,
 )
-from npg_notify.db.mlwh import Base
 
-TEST_CONFIG_FILE= "qc_state_app_config.ini"
+TEST_CONFIG_FILE = "qc_state_app_config.ini"
 test_config = os.path.join(os.path.dirname(__file__), "data", TEST_CONFIG_FILE)
 
 
diff --git a/tests/data/qc_state_app_config.ini b/tests/data/qc_state_app_config.ini
index 66e686c..e58ddc9 100644
--- a/tests/data/qc_state_app_config.ini
+++ b/tests/data/qc_state_app_config.ini
@@ -8,16 +8,30 @@ dbhost = 127.0.0.1
 dbport = 3306
 dbschema = study_notify
 
+[SSL]
+
+ssl_cert_file = some_path
+
 [LANGQC]
 
 api_url = https://langqc.com
-recently_qced_url = api/products/qc
+recently_qced_url = api/products/qc?weeks=4&final=yes&seq_level=yes
+pac_bio_well_libraries_url = api/pacbio/wells/[product_id]/libraries
+pac_bio_run_iu_url = ui/run
 
 [PORCH]
 
 api_url = https://porch.com
 pipeline_name = qc_notifications
-pipeline_uri = https://github.com/mgcam/npg_notifications.git
+pipeline_uri = https://github.com/wtsi/npg_notifications.git
 pipeline_version = 0.1
 npg_porch_token = aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 
+[IRODS]
+
+user_manual_url = https://confluence_irods.com/display/FARM/iRODS
+
+[MAIL]
+
+domain = some.com
+
diff --git a/tests/test_generate_and_send_email.py b/tests/test_generate_and_send_email.py
new file mode 100644
index 0000000..f9cdd33
--- /dev/null
+++ b/tests/test_generate_and_send_email.py
@@ -0,0 +1,229 @@
+import random
+
+from npg_notify.mail import generate_email
+
+domain = "langqc.com"
+langqc_run_url = "https://langqc.com/ui/run"
+irods_docs_url = "https://confluence_irods.com/iRODS"
+footer = (
+    "The QC review is complete and your data should now be available "
+    + "from iRODS (see https://confluence_irods.com/iRODS).\n"
+    + "QC information for this run: https://langqc.com/ui/run/TRACTION-RUN.\n\n"
+    + "If you have any questions or need further assistance, please feel free "
+    + "to reach out to a Scientific Service Representative at dnap-ssr@langqc.com.\n\n"
+    + "NPG on behalf of DNA Pipelines\n"
+)
+
+id_product = "f910e2fc6bd10913fb7df100e788192962d71d57c85e3d300c9fa3e24e6691db"
+
+
+def get_well_data(libraries):
+    return {
+        "id_product": id_product,
+        "label": "D1",
+        "plate_number": 1,
+        "run_name": "TRACTION-RUN-1333",
+        "run_start_time": "2024-06-18T10:01:42",
+        "run_complete_time": "2024-06-19T16:26:46",
+        "well_start_time": "2024-06-18T11:44:43",
+        "well_complete_time": "2024-06-20T14:36:27",
+        "run_status": "Complete",
+        "well_status": "Complete",
+        "instrument_name": "84098",
+        "instrument_type": "Revio",
+        "qc_state": None,
+        "libraries": libraries,
+    }
+
+
+def generate_tag():
+    return "".join([random.choice(["C", "T", "G", "A"]) for i in range(1, 16)])
+
+
+def test_generate_email_one_library():
+    qc_outcome = {
+        "qc_state": "Passed With Distinction",
+        "is_preliminary": False,
+        "qc_type": "sequencing",
+        "outcome": True,
+        "id_product": id_product,
+        "date_created": "2024-06-28T14:22:18",
+        "date_updated": "2024-06-28T14:24:47",
+        "user": "user1@langqc.com",
+        "created_by": "LangQC",
+    }
+    libraries = [
+        {
+            "study_id": "1234",
+            "study_name": "Reference Genomes_ DNA",
+            "sample_id": "778655549",
+            "sample_name": "1234STDY13618009",
+            "tag_sequence": ["CTGCGATCACGAGTAT"],
+            "library_type": "Pacbio_HiFi",
+            "pool_name": "TRAC-2-3818",
+        }
+    ]
+
+    (subject, generated_content) = generate_email(
+        domain=domain,
+        langqc_run_url=langqc_run_url,
+        irods_docs_url=irods_docs_url,
+        qc_outcome=qc_outcome,
+        well_data=get_well_data(libraries),
+        libraries=libraries,
+    )
+
+    content = (
+        "Study 1234: PacBio data is available\n\n"
+        + "Study name: Reference Genomes_ DNA\n"
+        + "Run: TRACTION-RUN-1333\n"
+        + "Well label: D1\n"
+        + "Plate number: 1\n"
+        + "QC outcome: Passed With Distinction (Pass)\n"
+        + "Samples:\n"
+        + "\t1234STDY13618009,\n"
+        + "\t1 sample in total\n\n"
+    )
+
+    assert subject == "Study 1234: PacBio data is available"
+    assert generated_content == (
+        content + footer.replace("TRACTION-RUN", "TRACTION-RUN-1333")
+    )
+
+    qc_outcome["outcome"] = None
+    qc_outcome["qc_state"] = "Nobody can tell"
+    (subject, generated_content) = generate_email(
+        domain=domain,
+        langqc_run_url=langqc_run_url,
+        irods_docs_url=irods_docs_url,
+        qc_outcome=qc_outcome,
+        well_data=get_well_data(libraries),
+        libraries=libraries,
+    )
+
+    content = (
+        "Study 1234: PacBio data is available\n\n"
+        + "Study name: Reference Genomes_ DNA\n"
+        + "Run: TRACTION-RUN-1333\n"
+        + "Well label: D1\n"
+        + "Plate number: 1\n"
+        + "QC outcome: Nobody can tell (Undefined)\n"
+        + "Samples:\n"
+        + "\t1234STDY13618009,\n"
+        + "\t1 sample in total\n\n"
+    )
+
+    assert generated_content == (
+        content + footer.replace("TRACTION-RUN", "TRACTION-RUN-1333")
+    )
+
+
+def test_generate_email_two_libraries():
+    qc_outcome = {
+        "qc_state": "Failed (Instrument)",
+        "is_preliminary": False,
+        "qc_type": "sequencing",
+        "outcome": False,
+        "id_product": id_product,
+        "date_created": "2024-06-28T14:22:18",
+        "date_updated": "2024-06-28T14:24:47",
+        "user": "user1@langqc.com",
+        "created_by": "LangQC",
+    }
+
+    libraries = [
+        {
+            "study_id": "1234",
+            "study_name": "Reference Genomes_ DNA",
+            "sample_id": "778655549",
+            "sample_name": f"1234STDY13618009{i}",
+            "tag_sequence": [generate_tag()],
+            "library_type": "Pacbio_HiFi",
+            "pool_name": "TRAC-2-3818",
+        }
+        for i in range(0, 5)
+    ]
+
+    (subject, generated_content) = generate_email(
+        domain=domain,
+        langqc_run_url=langqc_run_url,
+        irods_docs_url=irods_docs_url,
+        qc_outcome=qc_outcome,
+        well_data=get_well_data(libraries),
+        libraries=libraries[0:2],
+    )
+
+    content = (
+        "Study 1234: PacBio data is available\n\n"
+        + "Study name: Reference Genomes_ DNA\n"
+        + "Run: TRACTION-RUN-1333\n"
+        + "Well label: D1\n"
+        + "Plate number: 1\n"
+        + "QC outcome: Failed (Instrument) (Fail)\n"
+        + "Samples:\n"
+        + "\t1234STDY136180090,\n"
+        + "\t1234STDY136180091,\n"
+        + "\t2 samples in total\n\n"
+    )
+
+    assert subject == "Study 1234: PacBio data is available"
+    assert generated_content == (
+        content + footer.replace("TRACTION-RUN", "TRACTION-RUN-1333")
+    )
+
+
+def test_generate_email_seven_libraries():
+    qc_outcome = {
+        "qc_state": "Failed",
+        "is_preliminary": False,
+        "qc_type": "sequencing",
+        "outcome": False,
+        "id_product": id_product,
+        "date_created": "2024-06-28T14:22:18",
+        "date_updated": "2024-06-28T14:24:47",
+        "user": "user1@langqc.com",
+        "created_by": "LangQC",
+    }
+    libraries = [
+        {
+            "study_id": "1234",
+            "study_name": "Reference Genomes_ DNA",
+            "sample_id": "778655549",
+            "sample_name": f"1234STDY13618009{i}",
+            "tag_sequence": [generate_tag()],
+            "library_type": "Pacbio_HiFi",
+            "pool_name": "TRAC-2-3818",
+        }
+        for i in range(0, 7)
+    ]
+
+    (subject, generated_content) = generate_email(
+        domain=domain,
+        langqc_run_url=langqc_run_url,
+        irods_docs_url=irods_docs_url,
+        qc_outcome=qc_outcome,
+        well_data=get_well_data(libraries),
+        libraries=libraries,
+    )
+
+    content = (
+        "Study 1234: PacBio data is available\n\n"
+        + "Study name: Reference Genomes_ DNA\n"
+        + "Run: TRACTION-RUN-1333\n"
+        + "Well label: D1\n"
+        + "Plate number: 1\n"
+        + "QC outcome: Failed (Fail)\n"
+        + "Samples:\n"
+        + "\t1234STDY136180090,\n"
+        + "\t1234STDY136180091,\n"
+        + "\t1234STDY136180092,\n"
+        + "\t1234STDY136180093,\n"
+        + "\t1234STDY136180094,\n"
+        + "\t.....\n"
+        + "\t7 samples in total\n\n"
+    )
+
+    assert subject == "Study 1234: PacBio data is available"
+    assert generated_content == (
+        content + footer.replace("TRACTION-RUN", "TRACTION-RUN-1333")
+    )

From 637adf5baf245e3178c52181f041408d2d587824 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 17 Jul 2024 18:21:51 +0100
Subject: [PATCH 04/15] Additional SSL setting

---
 src/npg_notify/porch_wrapper/qc_state.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/npg_notify/porch_wrapper/qc_state.py b/src/npg_notify/porch_wrapper/qc_state.py
index 95dd562..fa0519d 100644
--- a/src/npg_notify/porch_wrapper/qc_state.py
+++ b/src/npg_notify/porch_wrapper/qc_state.py
@@ -4,6 +4,7 @@
 import re
 import sys
 import time
+from pathlib import PurePath
 from urllib.parse import urljoin
 
 from npg_notify.config import get_config_data
@@ -49,6 +50,11 @@ def run():
     ssl_cert_file = _get_ssl_cert_file_path(conf_file_path=conf_file_path)
     if ssl_cert_file:
         os.environ["SSL_CERT_FILE"] = ssl_cert_file
+        # For a custom CA SSL certificate, the directory  assigned to the
+        # REQUESTS_CA_BUNDLE env. variable should have been 'treated' with the
+        # c_rehash tool, which is supplied with the Python interpreter, see
+        # https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification
+        os.environ["REQUESTS_CA_BUNDLE"] = str(PurePath(ssl_cert_file).parent)
 
     action = args.action
     if action == "register":

From ea12e29e93e602713f1ddb9272561f0a3fefd448 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Thu, 18 Jul 2024 17:00:57 +0100
Subject: [PATCH 05/15] Improved a comment and error message as suggested

---
 src/npg_notify/porch_wrapper/qc_state.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/npg_notify/porch_wrapper/qc_state.py b/src/npg_notify/porch_wrapper/qc_state.py
index fa0519d..1668c91 100644
--- a/src/npg_notify/porch_wrapper/qc_state.py
+++ b/src/npg_notify/porch_wrapper/qc_state.py
@@ -109,7 +109,8 @@ def create_tasks(conf_file_path: str):
     del porch_conf["npg_porch_token"]
 
     # qc_states is a dictionary where keys are product IDs and values are
-    # lists of QC states. A list of one QC state is expected.
+    # lists of QC states. A list of one QC state is expected  because we are
+    # limiting to sequencing products only.
     num_errors = 0
     for product_id, qc_state_data in qc_states.items():
         try:
@@ -121,7 +122,8 @@ def create_tasks(conf_file_path: str):
             logger.debug(f"Created a new task {task}")
         except Exception as err:
             logger.error(
-                f"Error registering QC state for product {product_id}: {str(err)}"
+                f"Error registering task for pipeline {pipeline.name} with "
+                f"QC state change of {product_id}: {str(err)}"
             )
             num_errors += 1
 

From 9d55e15308837529ff3e7836772fe6dc6919d5eb Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Thu, 18 Jul 2024 17:35:52 +0100
Subject: [PATCH 06/15] Fixed typos, extended docs

---
 README.md                |  8 ++++----
 src/npg_notify/config.py | 19 +++++++++++--------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 89e4ad6..4d6f315 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
 # npg_notifications
 
-A utility for notifying customers about lifecycle events in the analisys
+A utility for notifying customers about lifecycle events in the analysis
 and QC of sequencing data.
 
-[Porch](https://github.com/wtsi-npg/npg_porch) service is used to queue and
+[porch](https://github.com/wtsi-npg/npg_porch) service is used to queue and
 process (send) notifications. The notification producer can repeatedly send
-to `porch` the same notification. `Porch` guarantees that the repeated
-notifications are not kept and, therefor, not processed.
+to `porch` the same notification. `porch` guarantees that the repeated
+notifications are not kept and therefore not processed.
 
 The consumer of the notifications is responsible for sending the message
 to the customer. For a fully automated system the consumer should implement
diff --git a/src/npg_notify/config.py b/src/npg_notify/config.py
index fe01cb8..b079e30 100644
--- a/src/npg_notify/config.py
+++ b/src/npg_notify/config.py
@@ -1,6 +1,6 @@
 import configparser
-import pathlib
 import json
+import pathlib
 
 """Common utility functions for the package."""
 
@@ -15,6 +15,9 @@ def get_config_data(conf_file_path: str, conf_file_section: str = None):
     the file is determined from the extension of the file name. In case of no
     extension an 'ini' type is assumed.
 
+    The content of the file is not cached, so subsequent calls to get data from
+    the same configuration file result in re-reading and re-parsing of the file.
+
     Args:
 
       conf_file_path:
@@ -33,13 +36,13 @@ def get_config_data(conf_file_path: str, conf_file_section: str = None):
       The value corresponding to this key is returned.
     """
 
-    conf_file_extention = pathlib.Path(conf_file_path).suffix
-    if conf_file_extention:
-        conf_file_extention = conf_file_extention[1:]
+    conf_file_extension = pathlib.Path(conf_file_path).suffix
+    if conf_file_extension:
+        conf_file_extension = conf_file_extension[1:]
     else:
-        conf_file_extention = DEFAULT_CONF_FILE_TYPE
+        conf_file_extension = DEFAULT_CONF_FILE_TYPE
 
-    if conf_file_extention == DEFAULT_CONF_FILE_TYPE:
+    if conf_file_extension == DEFAULT_CONF_FILE_TYPE:
         if not conf_file_section:
             raise Exception(
                 "'conf_file_section' argument is not given, "
@@ -52,7 +55,7 @@ def get_config_data(conf_file_path: str, conf_file_section: str = None):
 
         return {i[0]: i[1] for i in config[conf_file_section].items()}
 
-    elif conf_file_extention == "json":
+    elif conf_file_extension == "json":
         conf: dict = json.load(conf_file_path)
         if conf_file_section:
             if isinstance(conf, dict) is False:
@@ -68,5 +71,5 @@ def get_config_data(conf_file_path: str, conf_file_section: str = None):
 
     else:
         raise Exception(
-            f"Parsing for '{conf_file_extention}' files is not implemented"
+            f"Parsing for '{conf_file_extension}' files is not implemented"
         )

From 6ec152c9ef2af879a34790ed51edd44d3047262e Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 19 Jul 2024 11:40:38 +0100
Subject: [PATCH 07/15] Simplified the code and documented ORM classes

---
 src/npg_notify/db/mlwh.py                     | 70 ++++++++++++++-----
 .../{200-StudyUsers.yml => 200-StudyUser.yml} |  0
 tests/test_retrieve_contacts.py               |  5 +-
 3 files changed, 55 insertions(+), 20 deletions(-)
 rename tests/data/mlwh_fixtures/{200-StudyUsers.yml => 200-StudyUser.yml} (100%)

diff --git a/src/npg_notify/db/mlwh.py b/src/npg_notify/db/mlwh.py
index 33ff36c..603c800 100644
--- a/src/npg_notify/db/mlwh.py
+++ b/src/npg_notify/db/mlwh.py
@@ -18,27 +18,36 @@
 # You should have received a copy of the GNU General Public License along with
 # this program. If not, see <http://www.gnu.org/licenses/>.
 
-from sqlalchemy import ForeignKey, UniqueConstraint, Integer, String, select
+from sqlalchemy import ForeignKey, Integer, String, UniqueConstraint, select
+from sqlalchemy.exc import NoResultFound
 from sqlalchemy.orm import (
     DeclarativeBase,
     Mapped,
+    Session,
     mapped_column,
     relationship,
-    Session,
 )
-from sqlalchemy.exc import NoResultFound
 
 """
-Declarative ORM for some tables of multi-lims warehouse.
+Declarative ORM for some tables of multi-lims warehouse (mlwh) database.
 For simplicity, only columns used by this package are represented.
+
+Available ORM classes: Study, StudyUser.
+
+Utility methods: get_study_contacts.
 """
 
+"Study contacts with these roles will receive notifications."
+ROLES = ["manager", "follower", "owner"]
+
 
 class Base(DeclarativeBase):
     pass
 
 
 class Study(Base):
+    "A representation for the 'study' table."
+
     __tablename__ = "study"
 
     id_study_tmp = mapped_column(Integer, primary_key=True, autoincrement=True)
@@ -54,26 +63,34 @@ class Study(Base):
         ),
     )
 
-    study_users: Mapped[set["StudyUsers"]] = relationship()
+    study_users: Mapped[set["StudyUser"]] = relationship()
 
     def __repr__(self):
         return f"Study {self.id_study_lims}, {self.name}"
 
     def contacts(self) -> list[str]:
-        roles = ["manager", "follower", "owner"]
+        """Retrieves emails of contacts for this study object.
+
+        Returns:
+          A sorted list of unique emails for managers, followers or owners of
+          the study.
+        """
+
+        # In order to eliminate repetition, the comprehension expression below
+        # returns a set, which is then sorted to return a sorted list.
         return sorted(
-            list(
-                {
-                    u.email
-                    for u in self.study_users
-                    if (u.email is not None and u.role is not None)
-                    and (u.role in roles)
-                }
-            )
+            {
+                u.email
+                for u in self.study_users
+                if (u.email is not None and u.role is not None)
+                and (u.role in ROLES)
+            }
         )
 
 
-class StudyUsers(Base):
+class StudyUser(Base):
+    "A representation for the 'study_users' table."
+
     __tablename__ = "study_users"
 
     id_study_users_tmp = mapped_column(
@@ -94,10 +111,29 @@ def __repr__(self):
 
 
 class StudyNotFoundError(Exception):
+    "An error to use when a study does not exist in mlwh."
+
     pass
 
 
-def get_study_contacts(session: Session, id: str):
+def get_study_contacts(session: Session, id: str) -> list[str]:
+    """Retrieves emails of study contacts from the mlwh database.
+
+    Args:
+      session:
+        sqlalchemy.orm.Session object
+      id:
+        String study ID.
+
+    Returns:
+      A sorted list of unique emails for managers, followers or owners of
+      the study.
+
+    Example:
+
+      from npg_notify.db.mlwh get_study_contacts
+      contact_emails = get_study_contacts(session=session, id="5901")
+    """
     try:
         contacts = (
             session.execute(select(Study).where(Study.id_study_lims == id))
@@ -106,7 +142,7 @@ def get_study_contacts(session: Session, id: str):
         )
     except NoResultFound:
         raise StudyNotFoundError(
-            f"Study with ID {id} is not found in mlwarehouse"
+            f"Study with ID {id} is not found in ml warehouse"
         )
 
     return contacts
diff --git a/tests/data/mlwh_fixtures/200-StudyUsers.yml b/tests/data/mlwh_fixtures/200-StudyUser.yml
similarity index 100%
rename from tests/data/mlwh_fixtures/200-StudyUsers.yml
rename to tests/data/mlwh_fixtures/200-StudyUser.yml
diff --git a/tests/test_retrieve_contacts.py b/tests/test_retrieve_contacts.py
index 6e1c353..03067d7 100644
--- a/tests/test_retrieve_contacts.py
+++ b/tests/test_retrieve_contacts.py
@@ -1,12 +1,11 @@
 import pytest
-
-from npg_notify.db.mlwh import get_study_contacts, StudyNotFoundError
+from npg_notify.db.mlwh import StudyNotFoundError, get_study_contacts
 
 
 def test_retrieving_study_contacts(mlwh_test_session):
     with pytest.raises(
         StudyNotFoundError,
-        match=r"Study with ID 666 is not found in mlwarehouse",
+        match=r"Study with ID 666 is not found in ml warehouse",
     ):
         get_study_contacts(mlwh_test_session, "666")
 

From a4ae0733ccb0dd04b4d50c6f7efa36cca33f31f5 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 19 Jul 2024 15:09:50 +0100
Subject: [PATCH 08/15] Do not interpret conf path as an env var name

---
 src/npg_notify/db/utils.py | 55 +++++++-------------------------------
 1 file changed, 10 insertions(+), 45 deletions(-)

diff --git a/src/npg_notify/db/utils.py b/src/npg_notify/db/utils.py
index 95d2363..a87d8d8 100644
--- a/src/npg_notify/db/utils.py
+++ b/src/npg_notify/db/utils.py
@@ -1,21 +1,20 @@
-import os
 import importlib
+import os
 import pathlib
 import re
-import yaml
-
 from contextlib import contextmanager
 
-from sqlalchemy import create_engine, text, insert
+import yaml
+from sqlalchemy import create_engine, insert, text
 from sqlalchemy.engine import Engine
-from sqlalchemy.orm import Session, DeclarativeBase
+from sqlalchemy.orm import DeclarativeBase, Session
 from sqlalchemy_utils import create_database, database_exists, drop_database
 
 from npg_notify.config import get_config_data
 
 
-def db_credentials_from_config_file(
-    conf_file_path: str, conf_file_section: str = None
+def get_db_connection_string(
+    conf_file_path: str, conf_file_section: str | None = None
 ):
     """Parses a configuration file, generates a database connection string.
 
@@ -23,7 +22,8 @@ def db_credentials_from_config_file(
       conf_file_path:
         A configuration file with database connection details.
       conf_file_section:
-        The section of the configuration file. Optional.
+        The section of the configuration file. Is needed for the INI
+        configuration file format, otherwise optional.
 
     Returns:
       MySQL connection string suitable for SQLAchemy
@@ -46,41 +46,6 @@ def db_credentials_from_config_file(
     )
 
 
-def get_db_connection_string(
-    conf_file_path: str, conf_file_section: str = None
-):
-    """Generates a database connection string from supplied database credentials.
-
-    Args:
-      conf_file_path:
-        A configuration file with database connection details. If the
-        configuration file does not exist, assumes that the value is the name
-        of the environment variable that holds the database connection string.
-      conf_file_section:
-        The section of the configuration file. Optional.
-
-    Returns:
-      MySQL connection string suitable for SQLAchemy
-    """
-    try:
-        if os.path.exists(conf_file_path):
-            url = db_credentials_from_config_file(
-                conf_file_path, conf_file_section
-            )
-        else:
-            url = os.environ.get("conf_file_path")
-            if url is None or url == "":
-                raise Exception(
-                    f"{conf_file_path} is not a file path, neither it is a defined env. variable"
-                )
-    except Exception as err:
-        raise Exception(
-            "Failed to get db credentials: " + str(err.with_traceback(None))
-        )
-
-    return url
-
-
 @contextmanager
 def get_connection(
     conf_file_path: str, conf_file_section: str = None
@@ -201,9 +166,9 @@ def batch_load_from_yaml(
     for file_path in file_paths:
         with open(file_path, "r") as f:
             (head, file_name) = os.path.split(file_path)
-            # File name example: 200-PacBioRun.yml
+            # File name example: 200-StudyUser.yml
             m = re.match(r"\A\d+-([a-zA-Z]+)\.yml\Z", file_name)
-            if m is not None:
+            if m:
                 class_name = m.group(1)
                 table_class = getattr(module, class_name)
                 data = yaml.safe_load(f)

From 2b46bf23fc7fb0518c657ab17e6b74ed6def88a9 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 19 Jul 2024 15:36:32 +0100
Subject: [PATCH 09/15] Implemented mailer changes suggested in the review

---
 src/npg_notify/mail.py                   | 16 ++++++++++++---
 src/npg_notify/porch_wrapper/qc_state.py |  4 ++--
 tests/test_generate_and_send_email.py    | 25 +++++++++++++++++++-----
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/npg_notify/mail.py b/src/npg_notify/mail.py
index bf0503a..6c0a918 100644
--- a/src/npg_notify/mail.py
+++ b/src/npg_notify/mail.py
@@ -69,7 +69,7 @@ def send_notification(
         logger.warning(reply)
 
 
-def generate_email(
+def generate_email_pac_bio(
     domain: str,
     langqc_run_url: str,
     irods_docs_url: str,
@@ -79,6 +79,8 @@ def generate_email(
 ) -> tuple[str, str]:
     """Generates the subject line and the content for a notification.
 
+    This code is specific for the PacBio platform.
+
     Args:
       domain:
         The domain for any email addresses in the content of the notification.
@@ -92,12 +94,19 @@ def generate_email(
       well_data:
         A dictionary representing information about a well.
       libraries:
-        A list of dictionaries, which represent individual samples.
+        A list of dictionaries, which represent individual libraries.
+        All libraries in this list should belong to the same study.
 
     Returns:
       A tuple of two strings, the subject line and the content for the
       email notification.
     """
+    study_ids = {lib["study_id"] for lib in libraries}
+    if len(study_ids) != 1:
+        raise ValueError(
+            "Libraries from different studies in 'libraries' attribute"
+        )
+
     run_name = well_data["run_name"]
     plate_number = (
         well_data["plate_number"] if well_data["plate_number"] else "n/a"
@@ -105,7 +114,8 @@ def generate_email(
     outcome = "Undefined"
     if qc_outcome["outcome"] is not None:
         outcome = "Pass" if qc_outcome["outcome"] is True else "Fail"
-    study_id = libraries[0]["study_id"]
+
+    study_id = study_ids.pop()
     study_name = libraries[0]["study_name"]
 
     subject = f"Study {study_id}: PacBio data is available"
diff --git a/src/npg_notify/porch_wrapper/qc_state.py b/src/npg_notify/porch_wrapper/qc_state.py
index 1668c91..0a46a1c 100644
--- a/src/npg_notify/porch_wrapper/qc_state.py
+++ b/src/npg_notify/porch_wrapper/qc_state.py
@@ -10,7 +10,7 @@
 from npg_notify.config import get_config_data
 from npg_notify.db.mlwh import get_study_contacts
 from npg_notify.db.utils import db_credentials_from_config_file, get_connection
-from npg_notify.mail import generate_email, send_notification
+from npg_notify.mail import generate_email_pac_bio, send_notification
 from npg_porch_cli import send_request
 from npg_porch_cli.api import Pipeline, PorchAction
 from npg_porch_cli.api import send as send_porch_request
@@ -284,7 +284,7 @@ def process_task(conf_file_path):
                 continue
 
             try:
-                (subject, text) = generate_email(
+                (subject, text) = generate_email_pac_bio(
                     domain=domain,
                     langqc_run_url=urljoin(
                         langqc_base_url, pac_bio_run_iu_url
diff --git a/tests/test_generate_and_send_email.py b/tests/test_generate_and_send_email.py
index f9cdd33..bedc5a9 100644
--- a/tests/test_generate_and_send_email.py
+++ b/tests/test_generate_and_send_email.py
@@ -1,6 +1,7 @@
 import random
 
-from npg_notify.mail import generate_email
+import pytest
+from npg_notify.mail import generate_email_pac_bio
 
 domain = "langqc.com"
 langqc_run_url = "https://langqc.com/ui/run"
@@ -64,7 +65,7 @@ def test_generate_email_one_library():
         }
     ]
 
-    (subject, generated_content) = generate_email(
+    (subject, generated_content) = generate_email_pac_bio(
         domain=domain,
         langqc_run_url=langqc_run_url,
         irods_docs_url=irods_docs_url,
@@ -92,7 +93,7 @@ def test_generate_email_one_library():
 
     qc_outcome["outcome"] = None
     qc_outcome["qc_state"] = "Nobody can tell"
-    (subject, generated_content) = generate_email(
+    (subject, generated_content) = generate_email_pac_bio(
         domain=domain,
         langqc_run_url=langqc_run_url,
         irods_docs_url=irods_docs_url,
@@ -144,7 +145,7 @@ def test_generate_email_two_libraries():
         for i in range(0, 5)
     ]
 
-    (subject, generated_content) = generate_email(
+    (subject, generated_content) = generate_email_pac_bio(
         domain=domain,
         langqc_run_url=langqc_run_url,
         irods_docs_url=irods_docs_url,
@@ -197,7 +198,7 @@ def test_generate_email_seven_libraries():
         for i in range(0, 7)
     ]
 
-    (subject, generated_content) = generate_email(
+    (subject, generated_content) = generate_email_pac_bio(
         domain=domain,
         langqc_run_url=langqc_run_url,
         irods_docs_url=irods_docs_url,
@@ -227,3 +228,17 @@ def test_generate_email_seven_libraries():
     assert generated_content == (
         content + footer.replace("TRACTION-RUN", "TRACTION-RUN-1333")
     )
+
+    libraries[3]["study_id"] = "12345"
+    with pytest.raises(
+        ValueError,
+        match=r"Libraries from different studies in 'libraries' attribute",
+    ):
+        generate_email_pac_bio(
+            domain=domain,
+            langqc_run_url=langqc_run_url,
+            irods_docs_url=irods_docs_url,
+            qc_outcome=qc_outcome,
+            well_data=get_well_data(libraries),
+            libraries=libraries,
+        )

From 71999501486139c5f293e4bd5b35dc2de526bf27 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 19 Jul 2024 15:59:11 +0100
Subject: [PATCH 10/15] Code review suggestions implemented on top level

---
 src/npg_notify/porch_wrapper/qc_state.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/npg_notify/porch_wrapper/qc_state.py b/src/npg_notify/porch_wrapper/qc_state.py
index 0a46a1c..b71f569 100644
--- a/src/npg_notify/porch_wrapper/qc_state.py
+++ b/src/npg_notify/porch_wrapper/qc_state.py
@@ -9,7 +9,7 @@
 
 from npg_notify.config import get_config_data
 from npg_notify.db.mlwh import get_study_contacts
-from npg_notify.db.utils import db_credentials_from_config_file, get_connection
+from npg_notify.db.utils import get_connection, get_db_connection_string
 from npg_notify.mail import generate_email_pac_bio, send_notification
 from npg_porch_cli import send_request
 from npg_porch_cli.api import Pipeline, PorchAction
@@ -68,7 +68,7 @@ def run():
     exit(0 if success else 1)
 
 
-def create_tasks(conf_file_path: str):
+def create_tasks(conf_file_path: str) -> bool:
     """Retrieves and registers with Porch recently assigned QC states.
 
     Retrieves from LangQC API all recently (within the last four weeks) assigned
@@ -97,7 +97,7 @@ def create_tasks(conf_file_path: str):
     )
     qc_states = send_request(
         validate_ca_cert=_validate_ca_cert(),
-        url=urljoin(langqc_conf["api_url"], langqc_conf["recently_qced_url"]),
+        url=urljoin(langqc_conf["api_url"], langqc_conf["recently_qced_path"]),
         method="GET",
         auth_type=None,
     )
@@ -105,8 +105,7 @@ def create_tasks(conf_file_path: str):
     num_products = len(qc_states)
     logger.info(f"Retrieved QC states for {num_products} products.")
 
-    os.environ["NPG_PORCH_TOKEN"] = porch_conf["npg_porch_token"]
-    del porch_conf["npg_porch_token"]
+    os.environ["NPG_PORCH_TOKEN"] = porch_conf.pop("npg_porch_token")
 
     # qc_states is a dictionary where keys are product IDs and values are
     # lists of QC states. A list of one QC state is expected  because we are
@@ -161,7 +160,7 @@ def create_task(
     send_porch_request(action=action, pipeline=pipeline)
 
 
-def process_task(conf_file_path):
+def process_task(conf_file_path) -> bool:
     """Processes one task for the email notification pipeline.
 
     Performs the following steps:
@@ -209,13 +208,13 @@ def process_task(conf_file_path):
 
     # Get all config data or error before claiming the task.
     porch_api_url = porch_config["api_url"]
-    pac_bio_well_libraries_url = langqc_conf["pac_bio_well_libraries_url"]
+    pac_bio_well_libraries_path = langqc_conf["pac_bio_well_libraries_path"]
     langqc_base_url = langqc_conf["api_url"]
-    pac_bio_run_iu_url = langqc_conf["pac_bio_run_iu_url"]
+    pac_bio_run_iu_path = langqc_conf["pac_bio_run_iu_path"]
     irods_docs_url = irods_config["user_manual_url"]
     domain = mail_config["domain"]
     # Check that database credentials are in place
-    db_credentials_from_config_file(
+    get_db_connection_string(
         conf_file_path=conf_file_path, conf_file_section="MySQL MLWH"
     )
 
@@ -243,7 +242,7 @@ def process_task(conf_file_path):
         url = re.sub(
             "\[\w+\]",
             product_id,
-            pac_bio_well_libraries_url,
+            pac_bio_well_libraries_path,
         )
         url = urljoin(langqc_base_url, url)
         response = send_request(
@@ -287,7 +286,7 @@ def process_task(conf_file_path):
                 (subject, text) = generate_email_pac_bio(
                     domain=domain,
                     langqc_run_url=urljoin(
-                        langqc_base_url, pac_bio_run_iu_url
+                        langqc_base_url, pac_bio_run_iu_path
                     ),
                     irods_docs_url=irods_docs_url,
                     qc_outcome=task_input,

From dee0716d9ba46bc2fb6d00840ffcd2b083033a5e Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 19 Jul 2024 16:31:28 +0100
Subject: [PATCH 11/15] Extended README

---
 README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/README.md b/README.md
index 4d6f315..1ebe7ff 100644
--- a/README.md
+++ b/README.md
@@ -17,3 +17,27 @@ have to be sent for the same event, it is advised either to use a separate
 `porch` pipeline for each type of notification or to include additional
 information about the notification protocol and format into the payload that
 is sent to `porch`.
+
+## Scope
+
+The current version implements notifications for PacBio sequencing platform
+customers.
+
+## Running the scripts
+
+To register recently QC-ed entities as tasks with `porch`
+
+```bash
+qc_state_notification register --conf_file_path path/to/qc_state_app_config.ini
+```
+
+To process one `porch` task
+
+```bash
+qc_state_notification process --conf_file_path path/to/qc_state_app_config.ini
+```
+
+Processing includes claiming one task, sending per-study emails and updating the
+status of the `porch` task to `DONE`.
+
+The test data directory has an example of a [configuration file](tests/data/qc_state_app_config.ini).
\ No newline at end of file

From c78bc0f89a700e1d92e4580c81b83c432a2082d9 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Fri, 19 Jul 2024 16:39:49 +0100
Subject: [PATCH 12/15] Enabled flake8 warnings

---
 pyproject.toml                           | 6 ++++++
 src/npg_notify/porch_wrapper/qc_state.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c74d20c..9541b21 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,12 @@ ruff = "^0.4.9"
 # Set the maximum line length to 79.
 line-length = 79
 
+[tool.ruff.lint]
+select = [
+    # flake8
+    "W",
+]
+
 [tool.pytest.ini_options]
 addopts = [
     "--import-mode=importlib",
diff --git a/src/npg_notify/porch_wrapper/qc_state.py b/src/npg_notify/porch_wrapper/qc_state.py
index b71f569..f5937c0 100644
--- a/src/npg_notify/porch_wrapper/qc_state.py
+++ b/src/npg_notify/porch_wrapper/qc_state.py
@@ -240,7 +240,7 @@ def process_task(conf_file_path) -> bool:
     product_id = claimed_task["task_input"]["id_product"]
     try:
         url = re.sub(
-            "\[\w+\]",
+            r"\[\w+\]",
             product_id,
             pac_bio_well_libraries_path,
         )

From 5de150585cbb2e7d4c97ffb313980632b06f8726 Mon Sep 17 00:00:00 2001
From: Marina Gourtovaia <mg8@sanger.ac.uk>
Date: Tue, 23 Jul 2024 14:40:08 +0100
Subject: [PATCH 13/15] Made the project deployable with pip

---
 README.md      |  6 +++---
 pyproject.toml | 12 ++++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 1ebe7ff..fafd1b7 100644
--- a/README.md
+++ b/README.md
@@ -28,16 +28,16 @@ customers.
 To register recently QC-ed entities as tasks with `porch`
 
 ```bash
-qc_state_notification register --conf_file_path path/to/qc_state_app_config.ini
+npg_qc_state_notification register --conf_file_path path/to/qc_state_app_config.ini
 ```
 
 To process one `porch` task
 
 ```bash
-qc_state_notification process --conf_file_path path/to/qc_state_app_config.ini
+npg_qc_state_notification process --conf_file_path path/to/qc_state_app_config.ini
 ```
 
 Processing includes claiming one task, sending per-study emails and updating the
 status of the `porch` task to `DONE`.
 
-The test data directory has an example of a [configuration file](tests/data/qc_state_app_config.ini).
\ No newline at end of file
+The test data directory has an example of a [configuration file](tests/data/qc_state_app_config.ini).
diff --git a/pyproject.toml b/pyproject.toml
index 9541b21..0c34a74 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,26 +2,30 @@
 name = "npg_notify"
 version = "0.0.1"
 description = "Utility for client notifications"
-authors = ["Marina Gourtovaia <mg8@sanger.ac.uk>"]
+authors = ["Marina Gourtovaia"]
 license = "GPL-3.0-or-later"
 readme = "README.md"
 
 [tool.poetry.scripts]
-qc_state_notification = "npg_notify.porch_wrapper.qc_state:run" 
+npg_qc_state_notification = "npg_notify.porch_wrapper.qc_state:run" 
 
 [tool.poetry.dependencies]
 python = "^3.11"
 SQLAlchemy = { version="^2.0.1", extras=["pymysql"] }
 SQLAlchemy-Utils = "^0.41.2"
-cryptography = { version="^41.0.3" }
+cryptography = "^41.0.3"
+PyYAML = "^6.0.0"
 npg_porch_cli = { git="https://github.com/wtsi-npg/npg_porch_cli.git", branch="devel" }
 
 [tool.poetry.dev-dependencies]
 pytest = "^8.2.2"
-PyYAML = "^6.0.0"
 requests-mock = "^1.12.1"
 ruff = "^0.4.9"
 
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
 [tool.ruff]
 # Set the maximum line length to 79.
 line-length = 79

From fc63bb64839c9c55c4a773bf1ebde14a945e38b3 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 24 Jul 2024 10:05:08 +0100
Subject: [PATCH 14/15] Dependencies version update

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0c34a74..9a48d84 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ SQLAlchemy = { version="^2.0.1", extras=["pymysql"] }
 SQLAlchemy-Utils = "^0.41.2"
 cryptography = "^41.0.3"
 PyYAML = "^6.0.0"
-npg_porch_cli = { git="https://github.com/wtsi-npg/npg_porch_cli.git", branch="devel" }
+npg_porch_cli = { git="https://github.com/wtsi-npg/npg_porch_cli.git", tag="0.1.0" }
 
 [tool.poetry.dev-dependencies]
 pytest = "^8.2.2"

From ff95095cb19f7cec7f7e0895aab09f86833d0b46 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 24 Jul 2024 10:23:35 +0100
Subject: [PATCH 15/15] Added a change log file

---
 CHANGELOG.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..18966b1
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,12 @@
+# Change Log for npg_notifications Project
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/).
+This project adheres to [Semantic Versioning](http://semver.org/).
+
+## [Unreleased]
+
+## [0.1.0] - 2024-07-24
+
+### Added
+
+# Initial project scaffold, code and tests