-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #709 from umccr/feat/pg-dd
feat: dump postgres records
- Loading branch information
Showing
19 changed files
with
1,140 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { | ||
accountIdAlias, | ||
AppStage, | ||
computeSecurityGroupName, | ||
rdsMasterSecretName, | ||
region, | ||
vpcProps, | ||
} from '../constants'; | ||
import { PgDDStackProps } from '../../lib/workload/stateless/stacks/pg-dd/deploy/stack'; | ||
import { getDataBucketStackProps } from './dataBucket'; | ||
|
||
export const getPgDDProps = (stage: AppStage): PgDDStackProps | undefined => { | ||
const bucket = getDataBucketStackProps(stage); | ||
if (bucket.bucketName === undefined) { | ||
return undefined; | ||
} else { | ||
return { | ||
bucket: bucket.bucketName, | ||
prefix: 'pg-dd', | ||
secretArn: `arn:aws:secretsmanager:${region}:${accountIdAlias.beta}:secret:${rdsMasterSecretName}`, // pragma: allowlist secret | ||
lambdaSecurityGroupName: computeSecurityGroupName, | ||
vpcProps, | ||
}; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
deploy | ||
.env | ||
.env.example | ||
.gitignore | ||
README.md | ||
data | ||
.ruff_cache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
PG_DD_URL=postgresql://orcabus:[email protected]:5432 # pragma: allowlist secret | ||
PG_DD_DIR=data | ||
PG_DD_BUCKET=orcabus-test-data-843407916570-ap-southeast-2 | ||
PG_DD_PREFIX=pg-dd | ||
|
||
PG_DD_DATABASE_METADATA_MANAGER=metadata_manager | ||
PG_DD_DATABASE_SEQUENCE_RUN_MANAGER=sequence_run_manager | ||
PG_DD_DATABASE_WORKFLOW_MANAGER=workflow_manager | ||
PG_DD_DATABASE_FILEMANAGER=filemanager | ||
PG_DD_DATABASE_FILEMANAGER_SQL_DUMP='select * from s3_object order by sequencer limit 10000' | ||
PG_DD_DATABASE_FILEMANAGER_SQL_LOAD=s3_object |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.env | ||
data | ||
.ruff_cache | ||
response.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# This Dockerfile is intended to be used as part of a Docker Compose setup. | ||
# When running this microservice from the Docker Compose root, this Dockerfile | ||
# will build the image, install dependencies, and start the server | ||
|
||
FROM public.ecr.aws/docker/library/python:3.13 | ||
|
||
RUN pip3 install poetry | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
RUN poetry install --no-interaction --no-root | ||
|
||
ENTRYPOINT ["poetry", "run", "cli"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
.PHONY: * | ||
|
||
COMMAND ?= "load --exists-ok" | ||
|
||
install: | ||
@poetry update | ||
|
||
lint: install | ||
@poetry run ruff format . | ||
|
||
check: lint | ||
@poetry run ruff check . | ||
|
||
cli: install | ||
@poetry run cli $(COMMAND) | ||
|
||
clean: | ||
rm -rf data && rm -rf .ruff_cache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Postgres data dump | ||
|
||
Postgres data dump - a service that dumps (like `dd`) orcabus postgres databases to S3. | ||
|
||
## Usage | ||
|
||
Call the deployed function to update the current dump: | ||
|
||
```sh | ||
aws lambda invoke --function-name orcabus-pg-dd response.json | ||
``` | ||
|
||
This is setup to dump the metadata_manager, workflow_manager, sequence_run_manager, and 10000 of the most recent | ||
rows of the filemanager database. | ||
|
||
This command can be run locally using make, or by running `poetry` directly: | ||
|
||
```sh | ||
make cli COMMAND="--help" | ||
``` | ||
|
||
For example, to dump and upload a specific database to s3: | ||
|
||
```sh | ||
poetry run cli dump --database metadata_manager && poetry run cli upload | ||
``` | ||
|
||
The `Dockerfile` is setup to launch with the top-level `Makefile`, which also contains commands for running the CLI. | ||
By default `start-all-service` will run a `load` on the local database, downloading any dumps from S3 along the way. | ||
AWS credentials must be present in the shell for this to work. | ||
|
||
## Configuration | ||
|
||
This function can be configured by setting the following environment variables, see [.env.example][env-example] for an example: | ||
|
||
| Name | Description | Type | | ||
|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------| | ||
| `PG_DD_URL` | The database URL to dump databases from. | Postgres connection string | | ||
| `PG_DD_SECRET` | The secret name or ARN to fetch the database URL from. This is only used in the Lambda function, and overrides `PG_DD_URL`. | `string` | | ||
| `PG_DD_DATABASE_<DATABASE_NAME>` | A name of the database to dump records from where `<DATABASE_NAME>` represents the target database. Specify this multiple times to use dump from multiple databases. | `string` | | ||
| `PG_DD_DATABASE_<DATABASE_NAME>_SQL_DUMP` | Custom SQL code to execute when dumping database records for `<DATABASE_NAME>`. This is optional, and by default all records from all tables are dumped. Specify this is a list of SQL statements to generate a corresponding CSV file. | `string[]` or undefined | | ||
| `PG_DD_DATABASE_<DATABASE_NAME>_SQL_LOAD` | The name of the table to load into for `<DATABASE_NAME>`. This is required if loading data after dumping with `<PG_DD_DATABASE_DATABASE_NAME_SQL_DUMP>` to specify the table to load data into. | `string[]` or undefined | | ||
| `PG_DD_BUCKET` | The bucket to dump data to. This is required when deploying the Lambda function. | `string` or undefined | | ||
| `PG_DD_PREFIX` | The bucket prefix to use when writing to a bucket. This is optional. | `string` or undefined | | ||
| `PG_DD_DIR` | The local filesystem directory to dump data to when running this command locally. This is not used on the deployed Lambda function. | filesystem directory or undefined | | ||
|
||
## Local development | ||
|
||
This project uses [poetry] to manage dependencies. | ||
|
||
Run the linter and formatter: | ||
|
||
``` | ||
make check | ||
``` | ||
|
||
[poetry]: https://python-poetry.org/ | ||
[env-example]: .env.example |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import { Duration, Stack, StackProps } from 'aws-cdk-lib'; | ||
import { Construct } from 'constructs'; | ||
import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; | ||
import path from 'path'; | ||
import { Architecture, Runtime } from 'aws-cdk-lib/aws-lambda'; | ||
import { | ||
ISecurityGroup, | ||
IVpc, | ||
SecurityGroup, | ||
SubnetType, | ||
Vpc, | ||
VpcLookupOptions, | ||
} from 'aws-cdk-lib/aws-ec2'; | ||
import { NamedLambdaRole } from '../../../../components/named-lambda-role'; | ||
import { ManagedPolicy, PolicyStatement, Role } from 'aws-cdk-lib/aws-iam'; | ||
import { readFileSync } from 'fs'; | ||
|
||
/** | ||
* Props for the PgDD stack. | ||
*/ | ||
export type PgDDStackProps = { | ||
/** | ||
* The bucket to dump data to. | ||
*/ | ||
bucket: string; | ||
/** | ||
* Secret to connect to database with. | ||
*/ | ||
secretArn: string; | ||
/** | ||
* The key prefix when writing data. | ||
*/ | ||
prefix?: string; | ||
/** | ||
* Props to lookup the VPC with. | ||
*/ | ||
vpcProps: VpcLookupOptions; | ||
/** | ||
* Existing security group name to be attached on lambda. | ||
*/ | ||
lambdaSecurityGroupName: string; | ||
}; | ||
|
||
/** | ||
* Deploy the PgDD stack. | ||
*/ | ||
export class PgDDStack extends Stack { | ||
private readonly vpc: IVpc; | ||
private readonly securityGroup: ISecurityGroup; | ||
private readonly role: Role; | ||
|
||
constructor(scope: Construct, id: string, props: StackProps & PgDDStackProps) { | ||
super(scope, id, props); | ||
|
||
this.vpc = Vpc.fromLookup(this, 'MainVpc', props.vpcProps); | ||
this.securityGroup = SecurityGroup.fromLookupByName( | ||
this, | ||
'OrcaBusLambdaSecurityGroup', | ||
props.lambdaSecurityGroupName, | ||
this.vpc | ||
); | ||
|
||
this.role = new NamedLambdaRole(this, 'Role'); | ||
this.role.addManagedPolicy( | ||
ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaVPCAccessExecutionRole') | ||
); | ||
this.role.addToPolicy( | ||
new PolicyStatement({ | ||
actions: ['s3:PutObject'], | ||
resources: [`arn:aws:s3:::${props.bucket}`, `arn:aws:s3:::${props.bucket}/*`], | ||
}) | ||
); | ||
this.role.addToPolicy( | ||
new PolicyStatement({ | ||
actions: ['secretsmanager:GetSecretValue'], | ||
resources: [`${props.secretArn}-*`], | ||
}) | ||
); | ||
|
||
const securityGroup = new SecurityGroup(this, 'SecurityGroup', { | ||
vpc: this.vpc, | ||
allowAllOutbound: true, | ||
description: 'Security group that allows the PgDD Lambda function to egress out.', | ||
}); | ||
|
||
const entry = path.join(__dirname, '..'); | ||
new PythonFunction(this, 'function', { | ||
entry, | ||
functionName: 'orcabus-pg-dd', | ||
index: 'pg_dd/handler.py', | ||
runtime: Runtime.PYTHON_3_12, | ||
architecture: Architecture.ARM_64, | ||
timeout: Duration.minutes(5), | ||
memorySize: 1024, | ||
vpc: this.vpc, | ||
vpcSubnets: { | ||
subnetType: SubnetType.PRIVATE_WITH_EGRESS, | ||
}, | ||
bundling: { | ||
assetExcludes: [...readFileSync(path.join(entry, '.dockerignore'), 'utf-8').split('\n')], | ||
}, | ||
role: this.role, | ||
securityGroups: [securityGroup, this.securityGroup], | ||
environment: { | ||
PG_DD_SECRET: props.secretArn, | ||
PG_DD_BUCKET: props.bucket, | ||
PG_DD_DATABASE_METADATA_MANAGER: 'metadata_manager', | ||
PG_DD_DATABASE_SEQUENCE_RUN_MANAGER: 'sequence_run_manager', | ||
PG_DD_DATABASE_WORKFLOW_MANAGER: 'workflow_manager', | ||
PG_DD_DATABASE_FILEMANAGER: 'filemanager', | ||
PG_DD_DATABASE_FILEMANAGER_SQL: 'select * from s3_object order by sequencer limit 10000', | ||
...(props.prefix && { PG_DD_PREFIX: props.prefix }), | ||
}, | ||
}); | ||
} | ||
} |
Empty file.
Oops, something went wrong.