Skip to content

Commit

Permalink
feat(sscheck): Add New SSCheck Stack (#808)
Browse files Browse the repository at this point in the history
* Migrate SSChecker to the Orca world

* instruction && override_cycles

* fix body parse

* tests and fixes

* updates

* Update statelessStackCollectionClass.ts

* Update handler.py
  • Loading branch information
williamputraintan authored Jan 15, 2025
1 parent 54045a4 commit f38855e
Show file tree
Hide file tree
Showing 27 changed files with 2,882 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ test-stateless-app-suite:
@(cd lib/workload/stateless/stacks/fmannotator && $(MAKE) test)
@(cd lib/workload/stateless/stacks/bclconvert-manager && $(MAKE) test)
@(cd lib/workload/stateless/stacks/workflow-manager && $(MAKE) test)
@(cd lib/workload/stateless/stacks/sample-sheet-check && $(MAKE) test)

# The default outer `test` target run all test in this repo
test: test-stateful-iac test-stateless-iac test-stateful-app-suite test-stateless-app-suite
Expand Down
2 changes: 2 additions & 0 deletions config/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ import { getOraDecompressionManagerStackProps } from './stacks/oraDecompressionP
import { getPgDDProps } from './stacks/pgDD';
import { getDataMigrateStackProps } from './stacks/dataMigrate';
import { getHtsgetProps } from './stacks/htsget';
import { getSampleSheetCheckerProps } from './stacks/sampleSheetChecker';

interface EnvironmentConfig {
name: string;
Expand Down Expand Up @@ -135,6 +136,7 @@ export const getEnvironmentConfig = (stage: AppStage): EnvironmentConfig | null
fmAnnotatorProps: getFmAnnotatorProps(),
dataMigrateProps: getDataMigrateStackProps(stage),
htsgetProps: getHtsgetProps(stage),
sampleSheetCheckerProps: getSampleSheetCheckerProps(stage),
pgDDProps: getPgDDProps(stage),
},
};
Expand Down
19 changes: 19 additions & 0 deletions config/stacks/sampleSheetChecker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { SampleSheetCheckerStackProps } from '../../lib/workload/stateless/stacks/sample-sheet-check/stack';
import {
AppStage,
cognitoApiGatewayConfig,
corsAllowOrigins,
logsApiGatewayConfig,
} from '../constants';

export const getSampleSheetCheckerProps = (stage: AppStage): SampleSheetCheckerStackProps => {
return {
apiGatewayConstructProps: {
...cognitoApiGatewayConfig,
corsAllowOrigins: corsAllowOrigins[stage],
apiGwLogsConfig: logsApiGatewayConfig[stage],
apiName: 'SSCheck',
customDomainNamePrefix: 'sscheck-orcabus',
},
};
};
11 changes: 11 additions & 0 deletions lib/workload/stateless/stacks/sample-sheet-check/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Variables
LAMBDA_DIR := ./sample-sheet-check-lambda

# Default target
all: test

# Run tests
test:
$(MAKE) -C $(LAMBDA_DIR) test

.PHONY: all test
9 changes: 9 additions & 0 deletions lib/workload/stateless/stacks/sample-sheet-check/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Sample Sheet Checker

Deploy sample-sheet-checker in a lambda

## Checker Implementation

```sh
cd ./sample-sheet-check-lambda
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.venv
venv

/tests/sample/
!tests/sample/mock-*

/log/
SampleSheet_v2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Makefile for Python project

# Variables
VENV_DIR = .venv
PYTHON = $(VENV_DIR)/bin/python
PIP = $(VENV_DIR)/bin/pip
TEST_DIR = tests

# Create virtual environment
$(VENV_DIR)/bin/activate: requirements.txt
python3 -m venv $(VENV_DIR)
$(PIP) install -r requirements.txt

# Install dependencies
install: $(VENV_DIR)/bin/activate

# Run tests
test: install
$(PYTHON) -m unittest discover $(TEST_DIR)

# Clean up
clean:
rm -rf $(VENV_DIR)
find . -type f -name '*.pyc' -delete
find . -type d -name '__pycache__' -delete

.PHONY: install test clean
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Sample Sheet Checker

## Setup

### Using Python Environment

```shell
conda create -n orcabus_sscheck python=3.12
conda activate orcabus_sscheck
```

### Running Locally

To run the script and see the available options, use the following command:

```shell
python main.py -h

usage: main.py [-h] --path PATH [--log-path LOG_PATH] [--skip-metadata-check] [--skip-v2] [--v2-filename V2_FILENAME]

Run sample sheet check locally.

options:
-h, --help show this help message and exit
--path PATH The path to the sample sheet file.
--log-path LOG_PATH Name of the output file for the sscheck log file. Default: log/ss-checker.log
--skip-metadata-check
Skip sample sheet check against metadata API (API token required).
--skip-v2, --skip-v2-sample sheet-output
Skip generating the sample sheet v2. ('--skip-metadata-check' must be set to False).
--v2-filename V2_FILENAME
Name of the output file for the generated sample sheet v2. Default: SampleSheet_v2.csv

```

Running example

```shell
python main.py --path ./tests/sample/sample-1.csv
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import base64
import tempfile
import logging
from email.parser import BytesParser

from src.checker import construct_sample_sheet, run_sample_sheet_content_check, run_sample_sheet_check_with_metadata, \
construct_logger
from src.http import construct_body, construct_response
from src.v2_samplesheet_builder import v1_to_v2_samplesheet

# Logging
LOG_PATH = "/tmp/samplesheet_check.log"
logger = logging.getLogger()
logger.setLevel(logging.INFO)


def lambda_handler(event, context):
"""
Parameters
----------
event : Object
An object of payload pass through the lambda
context : Object
An aws resource information
"""
event_copy = event.copy()
event_copy['headers'] = event_copy.get('headers', {}).copy()
event_copy['headers'].pop('Authorization', None)
event_copy['headers'].pop('authorization', None)

logger.info(f"Processing (event, context): {event_copy}, {context}")

# Parse header
headers = event.get("headers", {})
origin = headers.get("origin", "")
authorization = headers.get("Authorization", headers.get("authorization", ""))
content_type = headers.get("Content-Type", headers.get("content-type", ""))

# Parse body payload
if event.get("isBase64Encoded", False):
body = base64.b64decode(event["body"])
else:
body = event["body"].encode()
ct = f"Content-Type: {content_type}\n\n".encode()
msg = BytesParser().parsebytes(ct + body)
if not msg.is_multipart():
body = construct_body(check_status="FAIL", error_message="Invalid body",
v2_sample_sheet='')
response = construct_response(status_code=400, body=body, origin=origin)
return response

multipart_content = {}
for part in msg.get_payload():
multipart_content[part.get_param(
'name', header='content-disposition')] = part.get_payload(decode=True)

file_data = multipart_content["file"]
log_level = multipart_content["logLevel"].decode("utf-8")

# Save file to temp file
temporary_data = tempfile.NamedTemporaryFile(mode='w+', delete=False)
temporary_data.write(file_data.decode("utf-8"))
temporary_data.seek(0)

try:
construct_logger(log_path=LOG_PATH, log_level=log_level)

# Construct and run sample sheet checker
sample_sheet = construct_sample_sheet(temporary_data.name)
run_sample_sheet_content_check(sample_sheet)
run_sample_sheet_check_with_metadata(sample_sheet, authorization)

# run sample sheet v2 conversion
v2_sample_sheet_str = v1_to_v2_samplesheet(sample_sheet)

except Exception as e:
body = construct_body(check_status="FAIL", error_message=str(e), log_path=LOG_PATH,
v2_sample_sheet='')
response = construct_response(status_code=200, body=body, origin=origin)
return response

body = construct_body(check_status='PASS', log_path=LOG_PATH, v2_sample_sheet=v2_sample_sheet_str)
response = construct_response(status_code=200, body=body, origin=origin)
return response
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM public.ecr.aws/lambda/python:3.12

WORKDIR ${LAMBDA_TASK_ROOT}

# COPY all files
COPY . .

# Install the specified packages
RUN pip install -r requirements.txt

# Specify handler
CMD [ "handler.lambda_handler" ]
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import json
import logging
import os
import argparse

from src.checker import construct_sample_sheet, run_sample_sheet_content_check, run_sample_sheet_check_with_metadata
from src.logger import set_logger
from src.v2_samplesheet_builder import v1_to_v2_samplesheet

def get_argument():
parser = argparse.ArgumentParser(
description="Run sample sheet check locally."
)
parser.add_argument(
"--path",
required=True,
help="The path to the sample sheet file.",
)

parser.add_argument(
"--log-path",
default="log/ss-checker.log",
help="Name of the output file for the sscheck log file. Default: log/ss-checker.log",
)

parser.add_argument(
"--skip-metadata-check", action="store_true", default=False,
help="Skip sample sheet check against metadata API (API token required)."
)

parser.add_argument(
"--skip-v2", "--skip-v2-sample sheet-output", action="store_true", default=False,
help="Skip generating the sample sheet v2. ('--skip-metadata-check' must be set to False)."
)

parser.add_argument(
"--v2-filename",
default="SampleSheet_v2.csv",
help="Name of the output file for the generated sample sheet v2. Default: SampleSheet_v2.csv",
)

args_input = parser.parse_args()

print("#" * 30)
print(f"Sample sheet (SS) Path : {args_input.path}")
print(f"Log path : {args_input.log_path}")
print(f"Skip SS Check w/ metadata : {args_input.skip_metadata_check}")
print(f"Skip generating v2 : {True if args_input.skip_metadata_check is True else args_input.skip_v2}")
print(f"SS V2 output (if enabled) : {args_input.v2_filename}")
print("#" * 30)

return args_input


if __name__ == "__main__":
args = get_argument()
filepath = args.path
log_path = args.log_path
v2_filename = args.v2_filename
result = {
"Check status": "PASS", "Log path": log_path, "V2 SampleSheet (if enabled)": v2_filename
}

# Setup logger logistic
directory = os.path.dirname(log_path)
if directory:
os.makedirs(directory, exist_ok=True)
set_logger(log_path=log_path, log_level=logging.INFO)

# Construct and run sample sheet checker
sample_sheet = construct_sample_sheet(filepath)
run_sample_sheet_content_check(sample_sheet)

if not args.skip_metadata_check:

token = os.environ.get("JWT_AUTH", None)
if token is None:
raise ValueError("JWT_AUTH environment variable is not set.")

run_sample_sheet_check_with_metadata(sample_sheet, token)

result = {"Check status": "PASS", "Log path": log_path}

if not args.skip_v2 and not args.skip_metadata_check:
try:

v2_sample_sheet_str = v1_to_v2_samplesheet(sample_sheet)

with open(v2_filename, 'w') as file:
file.write(v2_sample_sheet_str)
except Exception as e:
logging.error(f"Error generating v2 sample sheet: {e}")
raise e

result["V2 SampleSheet (if enabled)"] = v2_filename

print("\n")
print(json.dumps(result, indent=4))
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
v2-samplesheet-maker==4.2.4.post20241110133537
scipy==1.15.0
pandas==2.2.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import re


def camel_to_snake(name):
"""
Convert camel case to snake case
"""
name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
Loading

0 comments on commit f38855e

Please sign in to comment.