Skip to content

Commit

Permalink
Merge pull request #163 from netspective-labs/dev-feature-quality-system
Browse files Browse the repository at this point in the history
feat: implement comment parsing for data governance
  • Loading branch information
shah authored Oct 19, 2023
2 parents 3282694 + 6d9404c commit d3bbb60
Show file tree
Hide file tree
Showing 4 changed files with 518 additions and 0 deletions.
112 changes: 112 additions & 0 deletions lib/quality-system/governance/jsdoc/parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import { parse as commentParser } from "npm:comment-parser";
import { path, zod as z } from "../../../../deps.ts";
import { Any } from "https://deno.land/[email protected]/yaml/_utils.ts";

const lineageSchema = z.object({
input: z.object({
source: z.string(),
columns: z.array(z.string()),
}),
transformations: z.object({
type: z.string(),
description: z.string(),
}).optional(),
output: z.object({
target: z.string(),
columns: z.array(z.string()),
}).optional(),
});

const traceabilitySchema = z.object({
jiraIssue: z.string().optional(),
});
const returnsSchema = z.object({
full_name: z.string().optional(),
});
const paramSchema = z.object({
employee_id: z.string().optional(),
});
const codeReviewSchema = z.object({
isReviewed: z.boolean().optional(),
});
const informationSchema = z.object({
table: z.string().optional(),
description: z.string().optional(),
columns: z.record(z.string()).optional(),
});
const governanceSchema = z.object({
dataSteward: z.string().optional(),
dataOwner: z.string().optional(),
classification: z.string().optional(),
lineage: lineageSchema.optional(),
traceability: traceabilitySchema.optional(),
});
const parsedCommentSchema = z.object({
governance: governanceSchema.optional(),
lineage: lineageSchema.optional(),
traceability: traceabilitySchema.optional(),
function: z.string().optional(),
arguments: z.record(z.string()).optional(),
returns: returnsSchema.optional(), //z.string().optional(),
param: paramSchema.optional(),
codeReview: codeReviewSchema.optional(),
informationSchema: informationSchema.optional(),
});

type TagData = z.infer<typeof parsedCommentSchema>;

export function unsafeSourceComments(content: string): TagData[] {
content = content.replace(/(\w+)\s*:/g, '"$1":');
const parsedComments = commentParser(content);
const tagDataResult: TagData[] = [];

for (const obj of parsedComments) {
const tagData: Partial<TagData> = {
governance: { lineage: { input: { columns: [], source: "" } } },
lineage: { input: { source: "", columns: [] } },
codeReview: {},
traceability: {},
informationSchema: {},
param: {},
returns: {},
};

for (const tag of obj.tags) {
const tagKey = tag.tag.trim(); //toLowerCase();
(tagData as Any)[tagKey] = JSON.parse(`{${tag.type}}`);
}
tagDataResult.push(tagData);
}
return tagDataResult;
}

export function validatedSourceComments(content: string): TagData[] {
const vsc = unsafeSourceComments(content);
const tagDataResult: TagData[] = [];

for (const block of vsc) {
const result = parsedCommentSchema.safeParse(block);

if (result.success) {
tagDataResult.push(result.data);
} else {
//console.error("Validation errors:", result.error);
}
}
return tagDataResult;
}

export function governedSourceComments(content: string): TagData[] {
const vsc = validatedSourceComments(content);
const tagDataResult: TagData[] = [];

for (const block of vsc) {
// Merge the lineage information into the governance object
if (block.governance && block.lineage) {
block.governance.lineage = block.lineage;
delete block.lineage;
}
tagDataResult.push(block);
}
return tagDataResult;
}
86 changes: 86 additions & 0 deletions lib/quality-system/governance/jsdoc/parser_test.fixture-fail.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* @governance {
* dataSteward: "John Doe",
* dataOwner: "HR Department",
* classification: "Restricted"
* }
* @lineage {
* transformations: {
* type: "dataEntry",
* description: "Data entered manually by HR personnel"
* },
* output: {
* target: "Employee",
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
* }
* }
* @traceability {
* jiraIssue: "HR-456"
* }
* @informationSchema {
* table: "Employee",
* description: "Table to store employee personal and work-related information.",
* columns: {
* employee_id: "Primary key identifier for employees.",
* first_name: "Employee's first name.",
* last_name: "Employee's last name.",
* email: "Employee's email address.",
* phone_number: "Employee's contact number.",
* hire_date: "Date the employee was hired.",
* job_id: "Identifier for the employee's job title.",
* salary: "Employee's salary."
* }
* }
*/
CREATE TABLE Employee (
employee_id INT PRIMARY KEY,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(75),
phone_number VARCHAR(15),
hire_date DATE,
job_id VARCHAR(10),
salary DECIMAL(8, 2)
);

/**
* @governance {
* dataSteward: "John Doe",
* dataOwner: "HR Department",
* classification: "Restricted"
* }
* @lineage {
* input: {
* source: "Employee",
* columns: ["employee_id", "first_name", "last_name"]
* },
* transformations: {
* type: "concatenation",
* description: "Concatenating first and last names to generate full name."
* },
* output: {
* target: "full_name",
* columns: ["full_name"]
* }
* }
* @traceability {
* jiraIssue: "HR-123"
* }
* @param {
* employee_id: "101"
* } employee_id - Identifier of the employee.
* @returns {
* full_name: "Mathews"
* } - The full name of the employee.
*/
CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS
$$
DECLARE
full_name VARCHAR(101);
BEGIN
SELECT first_name || ' ' || last_name INTO full_name
FROM Employee
WHERE Employee.employee_id = get_full_name.employee_id;
RETURN full_name;
END;
$$ LANGUAGE plpgsql;
90 changes: 90 additions & 0 deletions lib/quality-system/governance/jsdoc/parser_test.fixture.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/**
* @governance {
* dataSteward: "John Doe",
* dataOwner: "HR Department",
* classification: "Restricted"
* }
* @lineage {
* input: {
* source: "hr_management_system",
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
* },
* transformations: {
* type: "dataEntry",
* description: "Data entered manually by HR personnel"
* },
* output: {
* target: "Employee",
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
* }
* }
* @traceability {
* jiraIssue: "HR-456"
* }
* @informationSchema {
* table: "Employee",
* description: "Table to store employee personal and work-related information.",
* columns: {
* employee_id: "Primary key identifier for employees.",
* first_name: "Employee's first name.",
* last_name: "Employee's last name.",
* email: "Employee's email address.",
* phone_number: "Employee's contact number.",
* hire_date: "Date the employee was hired.",
* job_id: "Identifier for the employee's job title.",
* salary: "Employee's salary."
* }
* }
*/
CREATE TABLE Employee (
employee_id INT PRIMARY KEY,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(75),
phone_number VARCHAR(15),
hire_date DATE,
job_id VARCHAR(10),
salary DECIMAL(8, 2)
);

/**
* @governance {
* dataSteward: "John Doe",
* dataOwner: "HR Department",
* classification: "Restricted"
* }
* @lineage {
* input: {
* source: "Employee",
* columns: ["employee_id", "first_name", "last_name"]
* },
* transformations: {
* type: "concatenation",
* description: "Concatenating first and last names to generate full name."
* },
* output: {
* target: "full_name",
* columns: ["full_name"]
* }
* }
* @traceability {
* jiraIssue: "HR-123"
* }
* @param {
* employee_id: "101"
* } employee_id - Identifier of the employee.
* @returns {
* full_name: "Mathews"
* } - The full name of the employee.
*/
CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS
$$
DECLARE
full_name VARCHAR(101);
BEGIN
SELECT first_name || ' ' || last_name INTO full_name
FROM Employee
WHERE Employee.employee_id = get_full_name.employee_id;
RETURN full_name;
END;
$$ LANGUAGE plpgsql;
Loading

0 comments on commit d3bbb60

Please sign in to comment.