Skip to content

Commit d3bbb60

Browse files
authored
Merge pull request #163 from netspective-labs/dev-feature-quality-system
feat: implement comment parsing for data governance
2 parents 3282694 + 6d9404c commit d3bbb60

File tree

4 files changed

+518
-0
lines changed

4 files changed

+518
-0
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import { parse as commentParser } from "npm:comment-parser";
2+
import { path, zod as z } from "../../../../deps.ts";
3+
import { Any } from "https://deno.land/[email protected]/yaml/_utils.ts";
4+
5+
const lineageSchema = z.object({
6+
input: z.object({
7+
source: z.string(),
8+
columns: z.array(z.string()),
9+
}),
10+
transformations: z.object({
11+
type: z.string(),
12+
description: z.string(),
13+
}).optional(),
14+
output: z.object({
15+
target: z.string(),
16+
columns: z.array(z.string()),
17+
}).optional(),
18+
});
19+
20+
const traceabilitySchema = z.object({
21+
jiraIssue: z.string().optional(),
22+
});
23+
const returnsSchema = z.object({
24+
full_name: z.string().optional(),
25+
});
26+
const paramSchema = z.object({
27+
employee_id: z.string().optional(),
28+
});
29+
const codeReviewSchema = z.object({
30+
isReviewed: z.boolean().optional(),
31+
});
32+
const informationSchema = z.object({
33+
table: z.string().optional(),
34+
description: z.string().optional(),
35+
columns: z.record(z.string()).optional(),
36+
});
37+
const governanceSchema = z.object({
38+
dataSteward: z.string().optional(),
39+
dataOwner: z.string().optional(),
40+
classification: z.string().optional(),
41+
lineage: lineageSchema.optional(),
42+
traceability: traceabilitySchema.optional(),
43+
});
44+
const parsedCommentSchema = z.object({
45+
governance: governanceSchema.optional(),
46+
lineage: lineageSchema.optional(),
47+
traceability: traceabilitySchema.optional(),
48+
function: z.string().optional(),
49+
arguments: z.record(z.string()).optional(),
50+
returns: returnsSchema.optional(), //z.string().optional(),
51+
param: paramSchema.optional(),
52+
codeReview: codeReviewSchema.optional(),
53+
informationSchema: informationSchema.optional(),
54+
});
55+
56+
type TagData = z.infer<typeof parsedCommentSchema>;
57+
58+
export function unsafeSourceComments(content: string): TagData[] {
59+
content = content.replace(/(\w+)\s*:/g, '"$1":');
60+
const parsedComments = commentParser(content);
61+
const tagDataResult: TagData[] = [];
62+
63+
for (const obj of parsedComments) {
64+
const tagData: Partial<TagData> = {
65+
governance: { lineage: { input: { columns: [], source: "" } } },
66+
lineage: { input: { source: "", columns: [] } },
67+
codeReview: {},
68+
traceability: {},
69+
informationSchema: {},
70+
param: {},
71+
returns: {},
72+
};
73+
74+
for (const tag of obj.tags) {
75+
const tagKey = tag.tag.trim(); //toLowerCase();
76+
(tagData as Any)[tagKey] = JSON.parse(`{${tag.type}}`);
77+
}
78+
tagDataResult.push(tagData);
79+
}
80+
return tagDataResult;
81+
}
82+
83+
export function validatedSourceComments(content: string): TagData[] {
84+
const vsc = unsafeSourceComments(content);
85+
const tagDataResult: TagData[] = [];
86+
87+
for (const block of vsc) {
88+
const result = parsedCommentSchema.safeParse(block);
89+
90+
if (result.success) {
91+
tagDataResult.push(result.data);
92+
} else {
93+
//console.error("Validation errors:", result.error);
94+
}
95+
}
96+
return tagDataResult;
97+
}
98+
99+
export function governedSourceComments(content: string): TagData[] {
100+
const vsc = validatedSourceComments(content);
101+
const tagDataResult: TagData[] = [];
102+
103+
for (const block of vsc) {
104+
// Merge the lineage information into the governance object
105+
if (block.governance && block.lineage) {
106+
block.governance.lineage = block.lineage;
107+
delete block.lineage;
108+
}
109+
tagDataResult.push(block);
110+
}
111+
return tagDataResult;
112+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/**
2+
* @governance {
3+
* dataSteward: "John Doe",
4+
* dataOwner: "HR Department",
5+
* classification: "Restricted"
6+
* }
7+
* @lineage {
8+
* transformations: {
9+
* type: "dataEntry",
10+
* description: "Data entered manually by HR personnel"
11+
* },
12+
* output: {
13+
* target: "Employee",
14+
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
15+
* }
16+
* }
17+
* @traceability {
18+
* jiraIssue: "HR-456"
19+
* }
20+
* @informationSchema {
21+
* table: "Employee",
22+
* description: "Table to store employee personal and work-related information.",
23+
* columns: {
24+
* employee_id: "Primary key identifier for employees.",
25+
* first_name: "Employee's first name.",
26+
* last_name: "Employee's last name.",
27+
* email: "Employee's email address.",
28+
* phone_number: "Employee's contact number.",
29+
* hire_date: "Date the employee was hired.",
30+
* job_id: "Identifier for the employee's job title.",
31+
* salary: "Employee's salary."
32+
* }
33+
* }
34+
*/
35+
CREATE TABLE Employee (
36+
employee_id INT PRIMARY KEY,
37+
first_name VARCHAR(50),
38+
last_name VARCHAR(50),
39+
email VARCHAR(75),
40+
phone_number VARCHAR(15),
41+
hire_date DATE,
42+
job_id VARCHAR(10),
43+
salary DECIMAL(8, 2)
44+
);
45+
46+
/**
47+
* @governance {
48+
* dataSteward: "John Doe",
49+
* dataOwner: "HR Department",
50+
* classification: "Restricted"
51+
* }
52+
* @lineage {
53+
* input: {
54+
* source: "Employee",
55+
* columns: ["employee_id", "first_name", "last_name"]
56+
* },
57+
* transformations: {
58+
* type: "concatenation",
59+
* description: "Concatenating first and last names to generate full name."
60+
* },
61+
* output: {
62+
* target: "full_name",
63+
* columns: ["full_name"]
64+
* }
65+
* }
66+
* @traceability {
67+
* jiraIssue: "HR-123"
68+
* }
69+
* @param {
70+
* employee_id: "101"
71+
* } employee_id - Identifier of the employee.
72+
* @returns {
73+
* full_name: "Mathews"
74+
* } - The full name of the employee.
75+
*/
76+
CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS
77+
$$
78+
DECLARE
79+
full_name VARCHAR(101);
80+
BEGIN
81+
SELECT first_name || ' ' || last_name INTO full_name
82+
FROM Employee
83+
WHERE Employee.employee_id = get_full_name.employee_id;
84+
RETURN full_name;
85+
END;
86+
$$ LANGUAGE plpgsql;
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/**
2+
* @governance {
3+
* dataSteward: "John Doe",
4+
* dataOwner: "HR Department",
5+
* classification: "Restricted"
6+
* }
7+
* @lineage {
8+
* input: {
9+
* source: "hr_management_system",
10+
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
11+
* },
12+
* transformations: {
13+
* type: "dataEntry",
14+
* description: "Data entered manually by HR personnel"
15+
* },
16+
* output: {
17+
* target: "Employee",
18+
* columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
19+
* }
20+
* }
21+
* @traceability {
22+
* jiraIssue: "HR-456"
23+
* }
24+
* @informationSchema {
25+
* table: "Employee",
26+
* description: "Table to store employee personal and work-related information.",
27+
* columns: {
28+
* employee_id: "Primary key identifier for employees.",
29+
* first_name: "Employee's first name.",
30+
* last_name: "Employee's last name.",
31+
* email: "Employee's email address.",
32+
* phone_number: "Employee's contact number.",
33+
* hire_date: "Date the employee was hired.",
34+
* job_id: "Identifier for the employee's job title.",
35+
* salary: "Employee's salary."
36+
* }
37+
* }
38+
*/
39+
CREATE TABLE Employee (
40+
employee_id INT PRIMARY KEY,
41+
first_name VARCHAR(50),
42+
last_name VARCHAR(50),
43+
email VARCHAR(75),
44+
phone_number VARCHAR(15),
45+
hire_date DATE,
46+
job_id VARCHAR(10),
47+
salary DECIMAL(8, 2)
48+
);
49+
50+
/**
51+
* @governance {
52+
* dataSteward: "John Doe",
53+
* dataOwner: "HR Department",
54+
* classification: "Restricted"
55+
* }
56+
* @lineage {
57+
* input: {
58+
* source: "Employee",
59+
* columns: ["employee_id", "first_name", "last_name"]
60+
* },
61+
* transformations: {
62+
* type: "concatenation",
63+
* description: "Concatenating first and last names to generate full name."
64+
* },
65+
* output: {
66+
* target: "full_name",
67+
* columns: ["full_name"]
68+
* }
69+
* }
70+
* @traceability {
71+
* jiraIssue: "HR-123"
72+
* }
73+
* @param {
74+
* employee_id: "101"
75+
* } employee_id - Identifier of the employee.
76+
* @returns {
77+
* full_name: "Mathews"
78+
* } - The full name of the employee.
79+
*/
80+
CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS
81+
$$
82+
DECLARE
83+
full_name VARCHAR(101);
84+
BEGIN
85+
SELECT first_name || ' ' || last_name INTO full_name
86+
FROM Employee
87+
WHERE Employee.employee_id = get_full_name.employee_id;
88+
RETURN full_name;
89+
END;
90+
$$ LANGUAGE plpgsql;

0 commit comments

Comments
 (0)