-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #163 from netspective-labs/dev-feature-quality-system
feat: implement comment parsing for data governance
- Loading branch information
Showing
4 changed files
with
518 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import { parse as commentParser } from "npm:comment-parser"; | ||
import { path, zod as z } from "../../../../deps.ts"; | ||
import { Any } from "https://deno.land/[email protected]/yaml/_utils.ts"; | ||
|
||
const lineageSchema = z.object({ | ||
input: z.object({ | ||
source: z.string(), | ||
columns: z.array(z.string()), | ||
}), | ||
transformations: z.object({ | ||
type: z.string(), | ||
description: z.string(), | ||
}).optional(), | ||
output: z.object({ | ||
target: z.string(), | ||
columns: z.array(z.string()), | ||
}).optional(), | ||
}); | ||
|
||
const traceabilitySchema = z.object({ | ||
jiraIssue: z.string().optional(), | ||
}); | ||
const returnsSchema = z.object({ | ||
full_name: z.string().optional(), | ||
}); | ||
const paramSchema = z.object({ | ||
employee_id: z.string().optional(), | ||
}); | ||
const codeReviewSchema = z.object({ | ||
isReviewed: z.boolean().optional(), | ||
}); | ||
const informationSchema = z.object({ | ||
table: z.string().optional(), | ||
description: z.string().optional(), | ||
columns: z.record(z.string()).optional(), | ||
}); | ||
const governanceSchema = z.object({ | ||
dataSteward: z.string().optional(), | ||
dataOwner: z.string().optional(), | ||
classification: z.string().optional(), | ||
lineage: lineageSchema.optional(), | ||
traceability: traceabilitySchema.optional(), | ||
}); | ||
const parsedCommentSchema = z.object({ | ||
governance: governanceSchema.optional(), | ||
lineage: lineageSchema.optional(), | ||
traceability: traceabilitySchema.optional(), | ||
function: z.string().optional(), | ||
arguments: z.record(z.string()).optional(), | ||
returns: returnsSchema.optional(), //z.string().optional(), | ||
param: paramSchema.optional(), | ||
codeReview: codeReviewSchema.optional(), | ||
informationSchema: informationSchema.optional(), | ||
}); | ||
|
||
type TagData = z.infer<typeof parsedCommentSchema>; | ||
|
||
export function unsafeSourceComments(content: string): TagData[] { | ||
content = content.replace(/(\w+)\s*:/g, '"$1":'); | ||
const parsedComments = commentParser(content); | ||
const tagDataResult: TagData[] = []; | ||
|
||
for (const obj of parsedComments) { | ||
const tagData: Partial<TagData> = { | ||
governance: { lineage: { input: { columns: [], source: "" } } }, | ||
lineage: { input: { source: "", columns: [] } }, | ||
codeReview: {}, | ||
traceability: {}, | ||
informationSchema: {}, | ||
param: {}, | ||
returns: {}, | ||
}; | ||
|
||
for (const tag of obj.tags) { | ||
const tagKey = tag.tag.trim(); //toLowerCase(); | ||
(tagData as Any)[tagKey] = JSON.parse(`{${tag.type}}`); | ||
} | ||
tagDataResult.push(tagData); | ||
} | ||
return tagDataResult; | ||
} | ||
|
||
export function validatedSourceComments(content: string): TagData[] { | ||
const vsc = unsafeSourceComments(content); | ||
const tagDataResult: TagData[] = []; | ||
|
||
for (const block of vsc) { | ||
const result = parsedCommentSchema.safeParse(block); | ||
|
||
if (result.success) { | ||
tagDataResult.push(result.data); | ||
} else { | ||
//console.error("Validation errors:", result.error); | ||
} | ||
} | ||
return tagDataResult; | ||
} | ||
|
||
export function governedSourceComments(content: string): TagData[] { | ||
const vsc = validatedSourceComments(content); | ||
const tagDataResult: TagData[] = []; | ||
|
||
for (const block of vsc) { | ||
// Merge the lineage information into the governance object | ||
if (block.governance && block.lineage) { | ||
block.governance.lineage = block.lineage; | ||
delete block.lineage; | ||
} | ||
tagDataResult.push(block); | ||
} | ||
return tagDataResult; | ||
} |
86 changes: 86 additions & 0 deletions
86
lib/quality-system/governance/jsdoc/parser_test.fixture-fail.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/** | ||
* @governance { | ||
* dataSteward: "John Doe", | ||
* dataOwner: "HR Department", | ||
* classification: "Restricted" | ||
* } | ||
* @lineage { | ||
* transformations: { | ||
* type: "dataEntry", | ||
* description: "Data entered manually by HR personnel" | ||
* }, | ||
* output: { | ||
* target: "Employee", | ||
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"] | ||
* } | ||
* } | ||
* @traceability { | ||
* jiraIssue: "HR-456" | ||
* } | ||
* @informationSchema { | ||
* table: "Employee", | ||
* description: "Table to store employee personal and work-related information.", | ||
* columns: { | ||
* employee_id: "Primary key identifier for employees.", | ||
* first_name: "Employee's first name.", | ||
* last_name: "Employee's last name.", | ||
* email: "Employee's email address.", | ||
* phone_number: "Employee's contact number.", | ||
* hire_date: "Date the employee was hired.", | ||
* job_id: "Identifier for the employee's job title.", | ||
* salary: "Employee's salary." | ||
* } | ||
* } | ||
*/ | ||
CREATE TABLE Employee ( | ||
employee_id INT PRIMARY KEY, | ||
first_name VARCHAR(50), | ||
last_name VARCHAR(50), | ||
email VARCHAR(75), | ||
phone_number VARCHAR(15), | ||
hire_date DATE, | ||
job_id VARCHAR(10), | ||
salary DECIMAL(8, 2) | ||
); | ||
|
||
/** | ||
* @governance { | ||
* dataSteward: "John Doe", | ||
* dataOwner: "HR Department", | ||
* classification: "Restricted" | ||
* } | ||
* @lineage { | ||
* input: { | ||
* source: "Employee", | ||
* columns: ["employee_id", "first_name", "last_name"] | ||
* }, | ||
* transformations: { | ||
* type: "concatenation", | ||
* description: "Concatenating first and last names to generate full name." | ||
* }, | ||
* output: { | ||
* target: "full_name", | ||
* columns: ["full_name"] | ||
* } | ||
* } | ||
* @traceability { | ||
* jiraIssue: "HR-123" | ||
* } | ||
* @param { | ||
* employee_id: "101" | ||
* } employee_id - Identifier of the employee. | ||
* @returns { | ||
* full_name: "Mathews" | ||
* } - The full name of the employee. | ||
*/ | ||
CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS | ||
$$ | ||
DECLARE | ||
full_name VARCHAR(101); | ||
BEGIN | ||
SELECT first_name || ' ' || last_name INTO full_name | ||
FROM Employee | ||
WHERE Employee.employee_id = get_full_name.employee_id; | ||
RETURN full_name; | ||
END; | ||
$$ LANGUAGE plpgsql; |
90 changes: 90 additions & 0 deletions
90
lib/quality-system/governance/jsdoc/parser_test.fixture.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/** | ||
* @governance { | ||
* dataSteward: "John Doe", | ||
* dataOwner: "HR Department", | ||
* classification: "Restricted" | ||
* } | ||
* @lineage { | ||
* input: { | ||
* source: "hr_management_system", | ||
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"] | ||
* }, | ||
* transformations: { | ||
* type: "dataEntry", | ||
* description: "Data entered manually by HR personnel" | ||
* }, | ||
* output: { | ||
* target: "Employee", | ||
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"] | ||
* } | ||
* } | ||
* @traceability { | ||
* jiraIssue: "HR-456" | ||
* } | ||
* @informationSchema { | ||
* table: "Employee", | ||
* description: "Table to store employee personal and work-related information.", | ||
* columns: { | ||
* employee_id: "Primary key identifier for employees.", | ||
* first_name: "Employee's first name.", | ||
* last_name: "Employee's last name.", | ||
* email: "Employee's email address.", | ||
* phone_number: "Employee's contact number.", | ||
* hire_date: "Date the employee was hired.", | ||
* job_id: "Identifier for the employee's job title.", | ||
* salary: "Employee's salary." | ||
* } | ||
* } | ||
*/ | ||
CREATE TABLE Employee ( | ||
employee_id INT PRIMARY KEY, | ||
first_name VARCHAR(50), | ||
last_name VARCHAR(50), | ||
email VARCHAR(75), | ||
phone_number VARCHAR(15), | ||
hire_date DATE, | ||
job_id VARCHAR(10), | ||
salary DECIMAL(8, 2) | ||
); | ||
|
||
/** | ||
* @governance { | ||
* dataSteward: "John Doe", | ||
* dataOwner: "HR Department", | ||
* classification: "Restricted" | ||
* } | ||
* @lineage { | ||
* input: { | ||
* source: "Employee", | ||
* columns: ["employee_id", "first_name", "last_name"] | ||
* }, | ||
* transformations: { | ||
* type: "concatenation", | ||
* description: "Concatenating first and last names to generate full name." | ||
* }, | ||
* output: { | ||
* target: "full_name", | ||
* columns: ["full_name"] | ||
* } | ||
* } | ||
* @traceability { | ||
* jiraIssue: "HR-123" | ||
* } | ||
* @param { | ||
* employee_id: "101" | ||
* } employee_id - Identifier of the employee. | ||
* @returns { | ||
* full_name: "Mathews" | ||
* } - The full name of the employee. | ||
*/ | ||
CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS | ||
$$ | ||
DECLARE | ||
full_name VARCHAR(101); | ||
BEGIN | ||
SELECT first_name || ' ' || last_name INTO full_name | ||
FROM Employee | ||
WHERE Employee.employee_id = get_full_name.employee_id; | ||
RETURN full_name; | ||
END; | ||
$$ LANGUAGE plpgsql; |
Oops, something went wrong.