From ccfd3abd6ddeaa09e74b2bc235518222a8fcea6c Mon Sep 17 00:00:00 2001 From: "Shahid N. Shah" Date: Sun, 17 Dec 2023 19:35:06 -0500 Subject: [PATCH] feat: introduce DuckDB table assurance --- lib/quality-system/governance.ts | 31 ++ pattern/governed/mod.ts | 2 +- render/dialect/duckdb/assurance.ts | 119 ++++++ .../duckdb/assurance_test-fixture-fail.csv | 13 + .../duckdb/assurance_test-fixture.duckdb.sql | 77 ++++ render/dialect/duckdb/assurance_test.ts | 114 ++++++ render/dialect/duckdb/integration.ts | 361 ++++++++++++++++++ .../{mod_test.ts => integration_test.ts} | 59 ++- render/dialect/duckdb/mod.ts | 237 +----------- render/domain/domain_test.ts | 4 +- render/emit/dialect.ts | 17 + render/emit/sql-notebook.ts | 8 +- 12 files changed, 788 insertions(+), 254 deletions(-) create mode 100644 render/dialect/duckdb/assurance.ts create mode 100644 render/dialect/duckdb/assurance_test-fixture-fail.csv create mode 100644 render/dialect/duckdb/assurance_test-fixture.duckdb.sql create mode 100644 render/dialect/duckdb/assurance_test.ts create mode 100644 render/dialect/duckdb/integration.ts rename render/dialect/duckdb/{mod_test.ts => integration_test.ts} (81%) diff --git a/lib/quality-system/governance.ts b/lib/quality-system/governance.ts index 8330be18..71aa96c5 100644 --- a/lib/quality-system/governance.ts +++ b/lib/quality-system/governance.ts @@ -1,4 +1,5 @@ import * as safety from "../../lib/universal/safety.ts"; +import * as ws from "../../lib/universal/whitespace.ts"; export type Documentable = { readonly description: string }; @@ -16,3 +17,33 @@ export function isQualitySystemSupplier( ); return isQS(o) && o.qualitySystem ? true : false; } + +/** + * String template literal function suitable to preparing Quality System docs + * (descriptions, etc.) in Markdown. This function auto-unindents our string + * literals and removes initial newline. + * @returns a function which can be used anywhere a string template literal can be + */ +export const qsMarkdown = ( + literals: TemplateStringsArray, + ...expressions: unknown[] +) => { + const literalSupplier = ws.whitespaceSensitiveTemplateLiteralSupplier( + literals, + expressions, + { + unindent: true, + removeInitialNewLine: true, + }, + ); + let interpolated = ""; + + // Loop through each part of the template + for (let i = 0; i < literals.length; i++) { + interpolated += literalSupplier(i); // Add the string part + if (i < expressions.length) { + interpolated += expressions[i]; // Add the interpolated value + } + } + return interpolated; +}; diff --git a/pattern/governed/mod.ts b/pattern/governed/mod.ts index 63632d1d..82fdd469 100644 --- a/pattern/governed/mod.ts +++ b/pattern/governed/mod.ts @@ -1,5 +1,5 @@ export * as ws from "../../lib/universal/whitespace.ts"; -export * as SQLa from "../../render/mod.ts"; +export * as tmpl from "../../render/mod.ts"; export * as diaPUML from "../../render/diagram/plantuml-ie-notation.ts"; export * from "../typical/enum-table.ts"; diff --git a/render/dialect/duckdb/assurance.ts b/render/dialect/duckdb/assurance.ts new file mode 100644 index 00000000..7564f658 --- /dev/null +++ b/render/dialect/duckdb/assurance.ts @@ -0,0 +1,119 @@ +import * as ws from "../../../lib/universal/whitespace.ts"; +import * as tmpl from "../../emit/mod.ts"; + +export class AssuranceRules { + constructor( + readonly govn: { + readonly SQL: ReturnType>; + }, + ) { + } + + insertIssue( + from: string, + typeText: string, + messageSql: string, + remediationSql?: string, + ) { + return ws.unindentWhitespace( + `INSERT INTO ingest_issue (issue_type, issue_message, remediation) + SELECT '${typeText}', + ${messageSql}, + ${remediationSql ?? "NULL"} + FROM ${from}`, + ); + } + + insertRowValueIssue( + from: string, + typeText: string, + rowNumSql: string, + columnNameSql: string, + valueSql: string, + messageSql: string, + remediationSql?: string, + ) { + return ws.unindentWhitespace( + `INSERT INTO ingest_issue (issue_type, issue_row, issue_column, invalid_value, issue_message, remediation) + SELECT '${typeText}', + ${rowNumSql}, + ${columnNameSql}, + ${valueSql}, + ${messageSql}, + ${remediationSql ?? "NULL"} + FROM ${from}`, + ); + } + + requiredColumnNamesInTable( + tableName: string, + requiredColNames: string[], + ): tmpl.SqlTextSupplier { + const cteName = "required_column_names_in_src"; + // deno-fmt-ignore + return this.govn.SQL` + WITH ${cteName} AS ( + SELECT column_name + FROM (VALUES ${requiredColNames.map(cn => `('${cn}')`).join(', ')}) AS required(column_name) + WHERE required.column_name NOT IN ( + SELECT upper(trim(column_name)) + FROM information_schema.columns + WHERE table_name = '${tableName}') + ) + ${this.insertIssue(cteName, + 'Missing Column', + `'Required column ' || column_name || ' is missing in the CSV file.'`, + `'Ensure the CSV contains the column "' || column_name || '"'` + )}`; + } + + intValueInAllTableRows( + tableName: string, + columnName: string, + ): tmpl.SqlTextSupplier { + const cteName = "numeric_value_in_all_rows"; + // deno-fmt-ignore + return this.govn.SQL` + WITH ${cteName} AS ( + SELECT '${columnName}' AS issue_column, + ${columnName} AS invalid_value, + src_file_row_number AS issue_row + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + AND ${columnName} NOT SIMILAR TO '^[+-]?[0-9]+$' + ) + ${this.insertRowValueIssue(cteName, + 'Data Type Mismatch', + 'issue_row', + 'issue_column', + 'invalid_value', + `'Non-integer value "' || invalid_value || '" found in ' || issue_column`, + `'Convert non-integer values to INTEGER'` + )}`; + } + + dotComEmailValueInAllTableRows( + tableName: string, + columnName: string, + ): tmpl.SqlTextSupplier { + const cteName = "proper_dot_com_email_address_in_all_rows"; + // deno-fmt-ignore + return this.govn.SQL` + WITH ${cteName} AS ( + SELECT '${columnName}' AS issue_column, + ${columnName} AS invalid_value, + src_file_row_number AS issue_row + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + AND ${columnName} NOT SIMILAR TO '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.com$' + ) + ${this.insertRowValueIssue(cteName, + 'Format Mismatch', + 'issue_row', + 'issue_column', + 'invalid_value', + `'Invalid email format "' || invalid_value || '" in ' || issue_column`, + `'Correct the email format'` + )}`; + } +} diff --git a/render/dialect/duckdb/assurance_test-fixture-fail.csv b/render/dialect/duckdb/assurance_test-fixture-fail.csv new file mode 100644 index 00000000..adc48098 --- /dev/null +++ b/render/dialect/duckdb/assurance_test-fixture-fail.csv @@ -0,0 +1,13 @@ +column1,column2,column3,column4,column5,column6,column7,column8,column9 +Valid,example@example.com,123,10,50,Unique1,Mandatory,ABC-1234,Yes +"Invalid (Email, Integer): Invalid email in column2 and non-integer in column4",invalid-email,123,abc,200,Unique2,Mandatory,ABC-1234,No +"Invalid (Range, Mandatory): Value out of range in column5 and missing mandatory value in column7",user@example.com,123,20,5,Unique3,,ABC-1235,Yes +"Invalid (Pattern): Pattern mismatch in column8",example@example.com,123,30,150,Unique4,Mandatory,ABC-ABCD,Yes +"Invalid (List): Value not in allowed list in column9",user@example.com,123,40,60,Unique5,Mandatory,XYZ-1234,No +"Duplicate (Unique): Duplicate value in column6",example@example.com,123,50,70,Unique1,Mandatory,ABC-1234,Maybe +"Invalid (Negative Integer): Negative integer in column4",invalid-email,123,-10,80,Unique6,Mandatory,ABC-1234,Yes +Valid,user@example.com,123,60,90,Unique7,,ABC-1234,No +Valid,example@example.com,123,70,100,Unique8,Mandatory,ABC-1234,Yes +"Invalid (Range): Value out of range in column5",user@example.com,123,80,110,Unique9,Mandatory,XYZ-1234,Maybe +Valid,example@example.com,123,90,120,Unique10,Mandatory,ABC-1234,Yes +"Invalid (List): Value not in allowed list in column9",invalid-email,123,100,130,Unique11,Mandatory,ABC-1234,No diff --git a/render/dialect/duckdb/assurance_test-fixture.duckdb.sql b/render/dialect/duckdb/assurance_test-fixture.duckdb.sql new file mode 100644 index 00000000..a5feab5e --- /dev/null +++ b/render/dialect/duckdb/assurance_test-fixture.duckdb.sql @@ -0,0 +1,77 @@ +CREATE TABLE ingest_session ( + ingest_session_id VARCHAR NOT NULL, + ingest_src VARCHAR NOT NULL, + ingest_table_name VARCHAR NOT NULL, +); + +CREATE TABLE ingest_issue ( + session_id VARCHAR NOT NULL, + issue_row INT, + issue_type VARCHAR NOT NULL, + issue_column VARCHAR, + invalid_value VARCHAR, + issue_message VARCHAR NOT NULL, + remediation VARCHAR +); + +INSERT INTO ingest_session (ingest_session_id, ingest_src, ingest_table_name) + VALUES (uuid(), 'assurance_test-fixture-fail.csv', 'synthetic_csv_fail'); + +CREATE TEMPORARY TABLE synthetic_csv_fail AS + SELECT *, row_number() OVER () as src_file_row_number, (SELECT ingest_session_id from ingest_session LIMIT 1) as ingest_session_id + FROM read_csv_auto('assurance_test-fixture-fail.csv', header=true); + +WITH required_column_names_in_src AS ( + SELECT column_name + FROM (VALUES ('column1'), ('column2'), ('column3'), ('column4'), ('column5'), ('column6'), ('column7'), ('column8'), ('column9')) AS required(column_name) + WHERE required.column_name NOT IN ( + SELECT upper(trim(column_name)) + FROM information_schema.columns + WHERE table_name = 'synthetic_csv_fail') +) +INSERT INTO ingest_issue (session_id, issue_type, issue_message, remediation) + SELECT (SELECT ingest_session_id FROM ingest_session LIMIT 1), + 'Missing Column', + 'Required column ' || column_name || ' is missing in the CSV file.', + 'Ensure the CSV contains the column "' || column_name || '"' + FROM required_column_names_in_src; + +-- NOTE: If the above does not pass, meaning not all columns with the proper +-- names are present, do not run the queries below because they assume +-- proper names and existence of columns. + +WITH numeric_value_in_all_rows AS ( + SELECT 'column4' AS issue_column, + column4 AS invalid_value, + src_file_row_number AS issue_row + FROM synthetic_csv_fail + WHERE column4 IS NOT NULL + AND column4 NOT SIMILAR TO '^[+-]?[0-9]+$' +) +INSERT INTO ingest_issue (session_id, issue_type, issue_row, issue_column, invalid_value, issue_message, remediation) + SELECT (SELECT ingest_session_id FROM ingest_session LIMIT 1), + 'Data Type Mismatch', + issue_row, + issue_column, + invalid_value, + 'Non-integer value "' || invalid_value || '" found in ' || issue_column, + 'Convert non-integer values to INTEGER' + FROM numeric_value_in_all_rows; + +WITH proper_dot_com_email_address_in_all_rows AS ( + SELECT 'column2' AS issue_column, + column2 AS invalid_value, + src_file_row_number AS issue_row + FROM synthetic_csv_fail + WHERE column2 IS NOT NULL + AND column2 NOT SIMILAR TO '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.com$' +) +INSERT INTO ingest_issue (session_id, issue_type, issue_row, issue_column, invalid_value, issue_message, remediation) + SELECT (SELECT ingest_session_id FROM ingest_session LIMIT 1), + 'Format Mismatch', + issue_row, + issue_column, + invalid_value, + 'Invalid email format "' || invalid_value || '" in ' || issue_column, + 'Correct the email format' + FROM proper_dot_com_email_address_in_all_rows; \ No newline at end of file diff --git a/render/dialect/duckdb/assurance_test.ts b/render/dialect/duckdb/assurance_test.ts new file mode 100644 index 00000000..220da10d --- /dev/null +++ b/render/dialect/duckdb/assurance_test.ts @@ -0,0 +1,114 @@ +import { testingAsserts as ta } from "../../deps-test.ts"; +import { path } from "../../deps.ts"; +import * as ws from "../../../lib/universal/whitespace.ts"; +import * as tmpl from "../../emit/mod.ts"; +import * as intr from "./integration.ts"; +import * as mod from "./assurance.ts"; + +export class SyntheticAssuranceRules + extends mod.AssuranceRules { + constructor( + readonly govn: { + readonly SQL: ReturnType>; + }, + ) { + super(govn); + } + + insertIssue( + from: string, + typeText: string, + messageSql: string, + remediationSql?: string, + ) { + return ws.unindentWhitespace(` + INSERT INTO ingest_issue (session_id, issue_type, issue_message, remediation) + SELECT (SELECT ingest_session_id FROM ingest_session LIMIT 1), + '${typeText}', + ${messageSql}, + ${remediationSql ?? "NULL"} + FROM ${from}`); + } + + insertRowValueIssue( + from: string, + typeText: string, + rowNumSql: string, + columnNameSql: string, + valueSql: string, + messageSql: string, + remediationSql?: string, + ) { + return ws.unindentWhitespace(` + INSERT INTO ingest_issue (session_id, issue_type, issue_row, issue_column, invalid_value, issue_message, remediation) + SELECT (SELECT ingest_session_id FROM ingest_session LIMIT 1), + '${typeText}', + ${rowNumSql}, + ${columnNameSql}, + ${valueSql}, + ${messageSql}, + ${remediationSql ?? "NULL"} + FROM ${from}`); + } +} + +Deno.test("DuckDB Table Content Assurance", () => { + const ctx = tmpl.typicalSqlEmitContext({ sqlDialect: tmpl.duckDbDialect() }); + const ddlOptions = tmpl.typicalSqlTextSupplierOptions(); + const ar = new SyntheticAssuranceRules({ + SQL: tmpl.SQL(ddlOptions), + }); + const tableName = "synthetic_csv_fail"; + const csvSrcFsPath = "assurance_test-fixture-fail.csv"; + + // deno-fmt-ignore + const ddlDefn = tmpl.SQL(ddlOptions)` + CREATE TABLE ingest_session ( + ingest_session_id VARCHAR NOT NULL, + ingest_src VARCHAR NOT NULL, + ingest_table_name VARCHAR NOT NULL, + ); + + CREATE TABLE ingest_issue ( + session_id VARCHAR NOT NULL, + issue_row INT, + issue_type VARCHAR NOT NULL, + issue_column VARCHAR, + invalid_value VARCHAR, + issue_message VARCHAR NOT NULL, + remediation VARCHAR + ); + + INSERT INTO ingest_session (ingest_session_id, ingest_src, ingest_table_name) + VALUES (uuid(), '${csvSrcFsPath}', '${tableName}'); + + ${intr.csvTableIntegration({ + csvSrcFsPath: () => csvSrcFsPath, + tableName, + isTempTable: true, + extraColumnsSql: [ + "row_number() OVER () as src_file_row_number", + "(SELECT ingest_session_id from ingest_session LIMIT 1) as ingest_session_id", + ], + })} + + ${ar.requiredColumnNamesInTable(tableName, + ['column1', 'column2', 'column3', + 'column4', 'column5', 'column6', + 'column7', 'column8', 'column9'])} + + -- NOTE: If the above does not pass, meaning not all columns with the proper + -- names are present, do not run the queries below because they assume + -- proper names and existence of columns. + + ${ar.intValueInAllTableRows(tableName, 'column4')} + + ${ar.dotComEmailValueInAllTableRows(tableName, 'column2')}`; + + ta.assertEquals( + ddlDefn.SQL(ctx), + Deno.readTextFileSync(path.fromFileUrl( + import.meta.resolve("./assurance_test-fixture.duckdb.sql"), + )), + ); +}); diff --git a/render/dialect/duckdb/integration.ts b/render/dialect/duckdb/integration.ts new file mode 100644 index 00000000..d5b988ed --- /dev/null +++ b/render/dialect/duckdb/integration.ts @@ -0,0 +1,361 @@ +import * as pgpass from "../../../lib/postgres/pgpass/pgpass-parse.ts"; +import * as ws from "../../../lib/universal/whitespace.ts"; +import * as tmpl from "../../emit/mod.ts"; + +// deno-lint-ignore no-explicit-any +type Any = any; + +export interface IntegrationSupplier { + readonly isIntegrationSupplier: true; +} + +export interface CsvIntegration< + Context extends tmpl.SqlEmitContext & { + readonly resolveFsPath?: ( + cvsI: CsvIntegration, + ctx: Context, + ) => string; + }, +> extends IntegrationSupplier { + readonly csvSrcFsPath: (ctx: Context) => string; + readonly readCsvAutoOptions?: string[]; + readonly from: tmpl.SqlTextSupplier< + Context + >; +} + +export function csvIntegration< + Context extends tmpl.SqlEmitContext & { + readonly resolveFsPath?: ( + cvsI: CsvIntegration, + ctx: Context, + ) => string; + }, +>( + init: Omit, "isIntegrationSupplier" | "from">, +) { + const csvI: CsvIntegration = { + isIntegrationSupplier: true, + readCsvAutoOptions: ["header=true"], + from: { + SQL: (ctx) => { + const csvSrcFsPath = ctx.resolveFsPath?.(csvI, ctx) ?? + csvI.csvSrcFsPath(ctx); + return `read_csv_auto('${csvSrcFsPath}'${ + csvI.readCsvAutoOptions + ? `, ${csvI.readCsvAutoOptions.join(", ")}` + : "" + })`; + }, + }, + ...init, + }; + return csvI; +} + +export interface CsvTableIntegration + extends CsvIntegration, tmpl.SqlTextSupplier { + readonly tableName: string; + readonly isTempTable?: boolean; + readonly extraColumnsSql?: string[]; +} + +export function csvTableIntegration< + Context extends tmpl.SqlEmitContext & { + readonly resolveFsPath?: ( + cvsI: CsvIntegration, + ctx: Context, + ) => string; + }, +>( + init: Omit< + CsvTableIntegration, + "isIntegrationSupplier" | "from" | "SQL" + >, +) { + const csvI: CsvTableIntegration = { + ...csvIntegration(init), + extraColumnsSql: ["row_number() OVER () as src_file_row_number"], + SQL: (ctx) => { + // deno-fmt-ignore + return ws.unindentWhitespace(` + CREATE ${csvI.isTempTable ? `TEMPORARY ` : ""}TABLE ${csvI.tableName} AS + SELECT *${csvI.extraColumnsSql ? `, ${csvI.extraColumnsSql.join(", ")}` : ""} + FROM ${csvI.from.SQL(ctx)}`); + }, + ...init, + }; + return csvI; +} + +export function csvTablesIntegrations< + Shape extends Record>, + Context extends tmpl.SqlEmitContext, +>( + integrations: Shape, + options?: { + readonly resolveFsPath?: ( + si: CsvTableIntegration, + ctx: Context, + ) => string; + }, +) { + return { + ...integrations, + SQL: (ctx: Context) => { + const localCtx = { + resolveFsPath: options?.resolveFsPath, + ...ctx, + } as Context; + const SQL: string[] = []; + for ( + const csvI of Object.values>(integrations) + ) { + // deno-fmt-ignore + SQL.push(csvI.SQL(localCtx)); + } + return SQL.join("\n"); + }, + }; +} + +export interface SqliteIntegration + extends IntegrationSupplier { + readonly sqliteDbFsPath: (ctx: Context) => string; + readonly attachAs: string; +} + +export function sqliteIntegration( + init: Omit< + SqliteIntegration, + "isIntegrationSupplier" + >, +) { + const si: SqliteIntegration = { + isIntegrationSupplier: true, + ...init, + }; + return si; +} + +export function sqliteIntegrations< + Shape extends Record>, + Context extends tmpl.SqlEmitContext, +>( + integrations: Shape, + options?: { + readonly resolveFsPath?: ( + si: SqliteIntegration, + ctx: Context, + ) => string; + }, +) { + return { + ...integrations, + SQL: (ctx: Context) => { + const SQL: string[] = []; + let emitCount = 0; + for ( + const si of Object.values>(integrations) + ) { + const sqliteDbFsPath = options?.resolveFsPath?.(si, ctx) ?? + si.sqliteDbFsPath(ctx); + emitCount++; + // deno-fmt-ignore + SQL.push(`${emitCount == 1 ? `INSTALL sqlite;\n` : ''}ATTACH '${sqliteDbFsPath}' AS ${si.attachAs} (TYPE sqlite);`); + } + return SQL.join("\n"); + }, + }; +} + +export interface ExcelIntegration< + SheetName extends string, + Context extends tmpl.SqlEmitContext, +> extends IntegrationSupplier { + readonly xlsFsPath: (ctx: Context) => string; + readonly from: (sheetName: SheetName) => tmpl.SqlTextSupplier; +} + +export function excelIntegration< + SheetName extends string, + Context extends tmpl.SqlEmitContext, +>( + init: + & Omit< + ExcelIntegration, + "isIntegrationSupplier" | "isExcelIntegration" | "from" + > + & { + readonly resolveFsPath?: ( + ei: ExcelIntegration, + ctx: Context, + ) => string; + }, +) { + const xlsEngine: ExcelIntegration = { + isIntegrationSupplier: true, + ...init, + from: (sheetName) => { + return { + SQL: (ctx) => { + const excelFileName = init?.resolveFsPath?.(xlsEngine, ctx) ?? + xlsEngine.xlsFsPath(ctx); + return `st_read('${excelFileName}', layer='${sheetName}')`; + }, + }; + }, + }; + return xlsEngine; +} + +export function excelIntegrations< + Shape extends Record< + string | number | symbol, + ExcelIntegration + >, + Context extends tmpl.SqlEmitContext, +>( + integrations: Shape, +) { + return { + ...integrations, + SQL: () => `INSTALL spatial; LOAD spatial;`, + }; +} + +export interface PostgreSqlIntegration< + Table extends string, + Context extends tmpl.SqlEmitContext, +> extends IntegrationSupplier { + readonly isPostgreSqlIntegration: true; + readonly pgpassConn: (ctx: Context) => pgpass.Connection; + readonly from: ( + tableName: Table, + options?: { schema?: string; pushdown?: boolean }, + ) => tmpl.SqlTextSupplier; +} + +export function postgreSqlIntegration< + TableName extends string, + Context extends tmpl.SqlEmitContext, +>( + init: Omit< + PostgreSqlIntegration, + "isIntegrationSupplier" | "isPostgreSqlIntegration" | "from" + >, +) { + const pgEngine: PostgreSqlIntegration = { + isIntegrationSupplier: true, + isPostgreSqlIntegration: true, + ...init, + from: (tableName, options) => { + return { + SQL: (ctx) => { + const conn = init.pgpassConn(ctx); + // deno-fmt-ignore + return `${options?.pushdown ? "postgres_scan_pushdown" : "postgres_scan"}('dbname=${conn.database} user=${conn.username} password=${conn.password} host=${conn.host} port=${String(conn.port)}', '${options?.schema ?? "public"}', '${tableName}')` + }, + }; + }, + }; + return pgEngine; +} + +export function postgreSqlIntegrations< + Shape extends Record< + string | number | symbol, + PostgreSqlIntegration + >, + Context extends tmpl.SqlEmitContext, +>( + integrations: Shape, +) { + return { + ...integrations, + SQL: () => `INSTALL postgres; LOAD postgres;`, + }; +} + +// the functions below are "convenience" functions for building strongly-typed +// integrations with attached Context + +export function integration() { + function csv(init: Parameters>[0]) { + return csvIntegration(init); + } + + function csvTable(init: Parameters>[0]) { + return csvTableIntegration(init); + } + + function sqlite(init: Parameters>[0]) { + return sqliteIntegration(init); + } + + function excel( + init: Parameters>[0], + ) { + return excelIntegration(init); + } + + function postgreSQL( + init: Parameters>[0], + ) { + return postgreSqlIntegration(init); + } + + return { + csv, + csvTable, + sqlite, + excel, + postgreSQL, + }; +} + +export function integrationsBuilder() { + function csvTables< + Shape extends Record< + string | number | symbol, + CsvTableIntegration + >, + >(shape: Shape) { + return csvTablesIntegrations(shape); + } + + function sqlite< + Shape extends Record>, + >( + shape: Shape, + options?: Parameters>[1], + ) { + return sqliteIntegrations(shape, options); + } + + function excel< + Shape extends Record< + string | number | symbol, + ExcelIntegration + >, + >(shape: Shape) { + return excelIntegrations(shape); + } + + function postgreSQL< + Shape extends Record< + string | number | symbol, + PostgreSqlIntegration + >, + >(shape: Shape) { + return postgreSqlIntegrations(shape); + } + + return { + factory: integration(), + csvTables, + sqlite, + excel, + postgreSQL, + }; +} diff --git a/render/dialect/duckdb/mod_test.ts b/render/dialect/duckdb/integration_test.ts similarity index 81% rename from render/dialect/duckdb/mod_test.ts rename to render/dialect/duckdb/integration_test.ts index 5cef53ea..6c524a42 100644 --- a/render/dialect/duckdb/mod_test.ts +++ b/render/dialect/duckdb/integration_test.ts @@ -3,18 +3,51 @@ import { path } from "../../deps.ts"; import * as pgpass from "../../../lib/postgres/pgpass/pgpass-parse.ts"; import * as ws from "../../../lib/universal/whitespace.ts"; import * as tmpl from "../../emit/mod.ts"; -import * as mod from "./mod.ts"; -import { SQLa } from "../../../pattern/governed/mod.ts"; +import * as mod from "./integration.ts"; -// deno-lint-ignore no-explicit-any -type Any = any; +Deno.test("DuckDB CSV Supplier", () => { + type EmitContext = tmpl.SqlEmitContext & { + readonly integrations: typeof integrations; + }; + type SqlTextSupplier = tmpl.SqlTextSupplier; + + const ib = mod.integrationsBuilder(); + const integrations = ib.csvTables({ + "SYNTHETIC": ib.factory.csvTable({ + csvSrcFsPath: () => "synthetic.csv", + tableName: "synthetic_csv_table", + isTempTable: true, + extraColumnsSql: [ + "row_number() OVER () as src_file_row_number", + "(SELECT ingest_session_id from ingest_session LIMIT 1) as ingest_session_id", + ], + }), + }); + + const ctx: EmitContext = { + ...tmpl.typicalSqlEmitContext({ sqlDialect: tmpl.duckDbDialect() }), + integrations, + }; + const ddlOptions = tmpl.typicalSqlTextSupplierOptions(); + + const ddlDefn = tmpl.SQL(ddlOptions)` + ${integrations}`; + + ta.assertEquals( + ddlDefn.SQL(ctx), + ws.unindentWhitespace(` + CREATE TEMPORARY TABLE synthetic_csv_table AS + SELECT *, row_number() OVER () as src_file_row_number, (SELECT ingest_session_id from ingest_session LIMIT 1) as ingest_session_id + FROM read_csv_auto('synthetic.csv', header=true);`), + ); +}); Deno.test("DuckDB SQLite SQL Supplier", () => { - type EmitContext = SQLa.SqlEmitContext & { + type EmitContext = tmpl.SqlEmitContext & { readonly integrations: typeof integrations; readonly resolve: (fsPath: string) => string; }; - type SqlTextSupplier = SQLa.SqlTextSupplier; + type SqlTextSupplier = tmpl.SqlTextSupplier; const integrations = mod.sqliteIntegrations({ "synthetic_sqlite_instance": mod.sqliteIntegration({ @@ -69,7 +102,7 @@ Deno.test("DuckDB SQLite SQL Supplier", () => { }; const ctx: EmitContext = { - ...tmpl.typicalSqlEmitContext(), + ...tmpl.typicalSqlEmitContext({ sqlDialect: tmpl.duckDbDialect() }), integrations, resolve: (fsPath) => fsPath, }; @@ -113,10 +146,10 @@ Deno.test("DuckDB SQLite SQL Supplier", () => { }); Deno.test("DuckDB PostgreSQL SQL Supplier", async () => { - type EmitContext = SQLa.SqlEmitContext & { + type EmitContext = tmpl.SqlEmitContext & { readonly integrations: typeof integrations; }; - type SqlTextSupplier = SQLa.SqlTextSupplier; + type SqlTextSupplier = tmpl.SqlTextSupplier; /** * Read some synthetic PostgreSQL backend connection parameters from a synthetic @@ -148,7 +181,7 @@ Deno.test("DuckDB PostgreSQL SQL Supplier", async () => { }; const ctx: EmitContext = { - ...tmpl.typicalSqlEmitContext(), + ...tmpl.typicalSqlEmitContext({ sqlDialect: tmpl.duckDbDialect() }), integrations, }; const ddlOptions = tmpl.typicalSqlTextSupplierOptions(); @@ -168,10 +201,10 @@ Deno.test("DuckDB PostgreSQL SQL Supplier", async () => { }); Deno.test("DuckDB Excel Supplier", () => { - type EmitContext = SQLa.SqlEmitContext & { + type EmitContext = tmpl.SqlEmitContext & { readonly integrations: typeof integrations; }; - type SqlTextSupplier = SQLa.SqlTextSupplier; + type SqlTextSupplier = tmpl.SqlTextSupplier; const ib = mod.integrationsBuilder(); const integrations = ib.excel({ @@ -189,7 +222,7 @@ Deno.test("DuckDB Excel Supplier", () => { }; const ctx: EmitContext = { - ...tmpl.typicalSqlEmitContext(), + ...tmpl.typicalSqlEmitContext({ sqlDialect: tmpl.duckDbDialect() }), integrations, }; const ddlOptions = tmpl.typicalSqlTextSupplierOptions(); diff --git a/render/dialect/duckdb/mod.ts b/render/dialect/duckdb/mod.ts index c4d380d7..6934c202 100644 --- a/render/dialect/duckdb/mod.ts +++ b/render/dialect/duckdb/mod.ts @@ -1,235 +1,2 @@ -import * as pgpass from "../../../lib/postgres/pgpass/pgpass-parse.ts"; -import * as tmpl from "../../emit/mod.ts"; - -// deno-lint-ignore no-explicit-any -type Any = any; - -export interface IntegrationSupplier { - readonly isIntegrationSupplier: true; -} - -// deno-lint-ignore no-empty-interface -export interface IntegrationsSupplier - extends tmpl.SqlTextSupplier { -} - -export interface SqliteIntegration - extends IntegrationSupplier { - readonly sqliteDbFsPath: (ctx: Context) => string; - readonly attachAs: string; -} - -export function sqliteIntegration( - init: Omit< - SqliteIntegration, - "isIntegrationSupplier" | "isSqliteIntegration" - >, -) { - const si: SqliteIntegration = { - isIntegrationSupplier: true, - ...init, - }; - return si; -} - -export function sqliteIntegrations< - Shape extends Record>, - Context extends tmpl.SqlEmitContext, ->( - integrations: Shape, - options?: { - readonly resolveFsPath?: ( - si: SqliteIntegration, - ctx: Context, - ) => string; - }, -) { - return { - ...integrations, - SQL: (ctx: Context) => { - const SQL: string[] = []; - let emitCount = 0; - for ( - const si of Object.values>(integrations) - ) { - const sqliteDbFsPath = options?.resolveFsPath?.(si, ctx) ?? - si.sqliteDbFsPath(ctx); - emitCount++; - // deno-fmt-ignore - SQL.push(`${emitCount == 1 ? `INSTALL sqlite;\n` : ''}ATTACH '${sqliteDbFsPath}' AS ${si.attachAs} (TYPE sqlite);`); - } - return SQL.join("\n"); - }, - }; -} - -export interface ExcelIntegration< - SheetName extends string, - Context extends tmpl.SqlEmitContext, -> extends IntegrationSupplier { - readonly xlsFsPath: (ctx: Context) => string; - readonly from: (sheetName: SheetName) => tmpl.SqlTextSupplier; -} - -export function excelIntegration< - SheetName extends string, - Context extends tmpl.SqlEmitContext, ->( - init: - & Omit< - ExcelIntegration, - "isIntegrationSupplier" | "isExcelIntegration" | "from" - > - & { - readonly resolveFsPath?: ( - ei: ExcelIntegration, - ctx: Context, - ) => string; - }, -) { - const xlsEngine: ExcelIntegration = { - isIntegrationSupplier: true, - ...init, - from: (sheetName) => { - return { - SQL: (ctx) => { - const excelFileName = init?.resolveFsPath?.(xlsEngine, ctx) ?? - xlsEngine.xlsFsPath(ctx); - return `st_read('${excelFileName}', layer='${sheetName}')`; - }, - }; - }, - }; - return xlsEngine; -} - -export function excelIntegrations< - Shape extends Record< - string | number | symbol, - ExcelIntegration - >, - Context extends tmpl.SqlEmitContext, ->( - integrations: Shape, -) { - return { - ...integrations, - SQL: () => `INSTALL spatial; LOAD spatial;`, - }; -} - -export interface PostgreSqlIntegration< - Table extends string, - Context extends tmpl.SqlEmitContext, -> extends IntegrationSupplier { - readonly isPostgreSqlIntegration: true; - readonly pgpassConn: (ctx: Context) => pgpass.Connection; - readonly from: ( - tableName: Table, - options?: { schema?: string; pushdown?: boolean }, - ) => tmpl.SqlTextSupplier; -} - -export function postgreSqlIntegration< - TableName extends string, - Context extends tmpl.SqlEmitContext, ->( - init: Omit< - PostgreSqlIntegration, - "isIntegrationSupplier" | "isPostgreSqlIntegration" | "from" - >, -) { - const pgEngine: PostgreSqlIntegration = { - isIntegrationSupplier: true, - isPostgreSqlIntegration: true, - ...init, - from: (tableName, options) => { - return { - SQL: (ctx) => { - const conn = init.pgpassConn(ctx); - // deno-fmt-ignore - return `${options?.pushdown ? "postgres_scan_pushdown" : "postgres_scan"}('dbname=${conn.database} user=${conn.username} password=${conn.password} host=${conn.host} port=${String(conn.port)}', '${options?.schema ?? "public"}', '${tableName}')` - }, - }; - }, - }; - return pgEngine; -} - -export function postgreSqlIntegrations< - Shape extends Record< - string | number | symbol, - PostgreSqlIntegration - >, - Context extends tmpl.SqlEmitContext, ->( - integrations: Shape, -) { - return { - ...integrations, - SQL: () => `INSTALL postgres; LOAD postgres;`, - }; -} - -// the functions below are "convenience" functions for building strongly-typed -// integrations with attached Context - -export function integration() { - function sqlite(init: Parameters>[0]) { - return sqliteIntegration(init); - } - - function excel( - init: Parameters>[0], - ) { - return excelIntegration(init); - } - - function postgreSQL( - init: Parameters>[0], - ) { - return postgreSqlIntegration(init); - } - - return { - sqlite, - excel, - postgreSQL, - }; -} - -export function integrationsBuilder() { - function sqlite< - Shape extends Record>, - >( - shape: Shape, - options?: Parameters>[1], - ) { - return sqliteIntegrations(shape, options); - } - - function excel< - Shape extends Record< - string | number | symbol, - ExcelIntegration - >, - >(shape: Shape) { - return excelIntegrations(shape); - } - - function postgreSQL< - Shape extends Record< - string | number | symbol, - PostgreSqlIntegration - >, - >(shape: Shape) { - return postgreSqlIntegrations(shape); - } - - return { - factory: integration(), - sqlite, - excel, - postgreSQL, - }; -} +export * from "./assurance.ts"; +export * from "./integration.ts"; diff --git a/render/domain/domain_test.ts b/render/domain/domain_test.ts index 4f46358b..96fba1b5 100644 --- a/render/domain/domain_test.ts +++ b/render/domain/domain_test.ts @@ -319,7 +319,7 @@ Deno.test("SQLa domain from Zod Types", async (tc) => { const sd = ztsdFactory.stringSDF.stringDialect(z.string()); ta.assertEquals( sd.sqlDataType().SQL(ctx), - `TEXT /* {"identity":"ANSI","isAnsiSqlDialect":true,"isSqliteDialect":false,"isPostgreSqlDialect":false,"isMsSqlServerDialect":false} */`, + `TEXT /* {"identity":"ANSI","isAnsiSqlDialect":true,"isSqliteDialect":false,"isDuckDbDialect":false,"isPostgreSqlDialect":false,"isMsSqlServerDialect":false} */`, ); }); @@ -340,7 +340,7 @@ Deno.test("SQLa domain from Zod Types", async (tc) => { const sd = ztsdFactory.stringSDF.stringDialect(z.string()); ta.assertEquals( sd.sqlDataType().SQL(pgCtx), - `TEXT /* {"identity":"PostgreSQL","isAnsiSqlDialect":true,"isSqliteDialect":false,"isPostgreSqlDialect":true,"isMsSqlServerDialect":false} */`, + `TEXT /* {"identity":"PostgreSQL","isAnsiSqlDialect":true,"isSqliteDialect":false,"isDuckDbDialect":false,"isPostgreSqlDialect":true,"isMsSqlServerDialect":false} */`, ); }); }); diff --git a/render/emit/dialect.ts b/render/emit/dialect.ts index da626141..7003a217 100644 --- a/render/emit/dialect.ts +++ b/render/emit/dialect.ts @@ -35,6 +35,22 @@ export const sqliteDialect = (): SqliteDialect => { }; }; +export interface DuckDbDialect extends AnsiSqlDialect { + readonly isDuckDbDialect: true; +} + +export const isDuckDbDialect = safety.typeGuard( + "isDuckDbDialect", +); + +export const duckDbDialect = (): DuckDbDialect => { + return { + identity: () => "DuckDB", + isAnsiDialect: true, + isDuckDbDialect: true, + }; +}; + export interface PostgreSqlDialect extends AnsiSqlDialect { readonly isPostgreSqlDialect: true; } @@ -72,6 +88,7 @@ export function dialectState(dialect: SqlDialect) { identity: dialect.identity("state"), isAnsiSqlDialect: isAnsiSqlDialect(dialect), isSqliteDialect: isSqliteDialect(dialect), + isDuckDbDialect: isDuckDbDialect(dialect), isPostgreSqlDialect: isPostgreSqlDialect(dialect), isMsSqlServerDialect: isMsSqlServerDialect(dialect), }; diff --git a/render/emit/sql-notebook.ts b/render/emit/sql-notebook.ts index 8a12395c..ca5b5027 100644 --- a/render/emit/sql-notebook.ts +++ b/render/emit/sql-notebook.ts @@ -94,8 +94,10 @@ export function sqlNotebookFactory< nbd, kernel, instance, + // loop through all notebook cells and return an array of SQL text suppliers + // and SQL text behaviors SQL: async ( - options: { + options?: { separator?: ( cell: Parameters[0], state: Parameters[1], @@ -129,11 +131,11 @@ export function sqlNotebookFactory< s.isSqlTextSupplier(state.execResult) || s.isSqlTextBehaviorSupplier(state.execResult) ) { - if (options.separator) SQL.push(options.separator(cell, state)); + if (options?.separator) SQL.push(options.separator(cell, state)); const sts = state.execResult as s.SqlTextSupplier; SQL.push(sts); } else { - const notSTS = options.onNotSqlTextSupplier?.(cell, state); + const notSTS = options?.onNotSqlTextSupplier?.(cell, state); if (notSTS) SQL.push(notSTS); } }