Merge pull request #163 from netspective-labs/dev-feature-quality-system

feat: implement comment parsing for data governance
netspective-labs · Oct 19, 2023 · d3bbb60 · d3bbb60
2 parents 3282694 + 6d9404c
commit d3bbb60
Show file tree

Hide file tree

Showing 4 changed files with 518 additions and 0 deletions.
diff --git a/lib/quality-system/governance/jsdoc/parser.ts b/lib/quality-system/governance/jsdoc/parser.ts
@@ -0,0 +1,112 @@
+import { parse as commentParser } from "npm:comment-parser";
+import { path, zod as z } from "../../../../deps.ts";
+import { Any } from "https://deno.land/[email protected]/yaml/_utils.ts";
+
+const lineageSchema = z.object({
+  input: z.object({
+    source: z.string(),
+    columns: z.array(z.string()),
+  }),
+  transformations: z.object({
+    type: z.string(),
+    description: z.string(),
+  }).optional(),
+  output: z.object({
+    target: z.string(),
+    columns: z.array(z.string()),
+  }).optional(),
+});
+
+const traceabilitySchema = z.object({
+  jiraIssue: z.string().optional(),
+});
+const returnsSchema = z.object({
+  full_name: z.string().optional(),
+});
+const paramSchema = z.object({
+  employee_id: z.string().optional(),
+});
+const codeReviewSchema = z.object({
+  isReviewed: z.boolean().optional(),
+});
+const informationSchema = z.object({
+  table: z.string().optional(),
+  description: z.string().optional(),
+  columns: z.record(z.string()).optional(),
+});
+const governanceSchema = z.object({
+  dataSteward: z.string().optional(),
+  dataOwner: z.string().optional(),
+  classification: z.string().optional(),
+  lineage: lineageSchema.optional(),
+  traceability: traceabilitySchema.optional(),
+});
+const parsedCommentSchema = z.object({
+  governance: governanceSchema.optional(),
+  lineage: lineageSchema.optional(),
+  traceability: traceabilitySchema.optional(),
+  function: z.string().optional(),
+  arguments: z.record(z.string()).optional(),
+  returns: returnsSchema.optional(), //z.string().optional(),
+  param: paramSchema.optional(),
+  codeReview: codeReviewSchema.optional(),
+  informationSchema: informationSchema.optional(),
+});
+
+type TagData = z.infer<typeof parsedCommentSchema>;
+
+export function unsafeSourceComments(content: string): TagData[] {
+  content = content.replace(/(\w+)\s*:/g, '"$1":');
+  const parsedComments = commentParser(content);
+  const tagDataResult: TagData[] = [];
+
+  for (const obj of parsedComments) {
+    const tagData: Partial<TagData> = {
+      governance: { lineage: { input: { columns: [], source: "" } } },
+      lineage: { input: { source: "", columns: [] } },
+      codeReview: {},
+      traceability: {},
+      informationSchema: {},
+      param: {},
+      returns: {},
+    };
+
+    for (const tag of obj.tags) {
+      const tagKey = tag.tag.trim(); //toLowerCase();
+      (tagData as Any)[tagKey] = JSON.parse(`{${tag.type}}`);
+    }
+    tagDataResult.push(tagData);
+  }
+  return tagDataResult;
+}
+
+export function validatedSourceComments(content: string): TagData[] {
+  const vsc = unsafeSourceComments(content);
+  const tagDataResult: TagData[] = [];
+
+  for (const block of vsc) {
+    const result = parsedCommentSchema.safeParse(block);
+
+    if (result.success) {
+      tagDataResult.push(result.data);
+    } else {
+      //console.error("Validation errors:", result.error);
+    }
+  }
+  return tagDataResult;
+}
+
+export function governedSourceComments(content: string): TagData[] {
+  const vsc = validatedSourceComments(content);
+  const tagDataResult: TagData[] = [];
+
+  for (const block of vsc) {
+    // Merge the lineage information into the governance object
+    if (block.governance && block.lineage) {
+      block.governance.lineage = block.lineage;
+      delete block.lineage;
+    }
+    tagDataResult.push(block);
+  }
+  return tagDataResult;
+}
diff --git a/lib/quality-system/governance/jsdoc/parser_test.fixture-fail.sql b/lib/quality-system/governance/jsdoc/parser_test.fixture-fail.sql
@@ -0,0 +1,86 @@
+/**
+ * @governance {
+ *   dataSteward: "John Doe",
+ *   dataOwner: "HR Department",
+ *   classification: "Restricted"
+ * }
+ * @lineage {
+ *   transformations: {
+ *     type: "dataEntry",
+ *     description: "Data entered manually by HR personnel"
+ *   },
+ *   output: {
+ *     target: "Employee",
+ *     columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
+ *   }
+ * }
+ * @traceability {
+ *   jiraIssue: "HR-456"
+ * }
+ * @informationSchema {
+ *   table: "Employee",
+ *   description: "Table to store employee personal and work-related information.",
+ *   columns: {
+ *     employee_id: "Primary key identifier for employees.",
+ *     first_name: "Employee's first name.",
+ *     last_name: "Employee's last name.",
+ *     email: "Employee's email address.",
+ *     phone_number: "Employee's contact number.",
+ *     hire_date: "Date the employee was hired.",
+ *     job_id: "Identifier for the employee's job title.",
+ *     salary: "Employee's salary."
+ *   }
+ * }
+ */
+CREATE TABLE Employee (
+    employee_id INT PRIMARY KEY,
+    first_name VARCHAR(50),
+    last_name VARCHAR(50),
+    email VARCHAR(75),
+    phone_number VARCHAR(15),
+    hire_date DATE,
+    job_id VARCHAR(10),
+    salary DECIMAL(8, 2)
+);
+
+/**
+ * @governance {
+ *   dataSteward: "John Doe",
+ *   dataOwner: "HR Department",
+ *   classification: "Restricted"
+ * }
+ * @lineage {
+ *   input: {
+ *     source: "Employee",
+ *     columns: ["employee_id", "first_name", "last_name"]
+ *   },
+ *   transformations: {
+ *     type: "concatenation",
+ *     description: "Concatenating first and last names to generate full name."
+ *   },
+ *   output: {
+ *     target: "full_name",
+ *     columns: ["full_name"]
+ *   }
+ * }
+ * @traceability {
+ *   jiraIssue: "HR-123"
+ * }
+ * @param {
+ *   employee_id: "101"
+ * } employee_id - Identifier of the employee.
+ * @returns {
+ *   full_name: "Mathews"
+ * } - The full name of the employee.
+ */
+CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS
+$$
+DECLARE
+    full_name VARCHAR(101);
+BEGIN
+    SELECT first_name || ' ' || last_name INTO full_name
+    FROM Employee
+    WHERE Employee.employee_id = get_full_name.employee_id;
+    RETURN full_name;
+END;
+$$ LANGUAGE plpgsql;
diff --git a/lib/quality-system/governance/jsdoc/parser_test.fixture.sql b/lib/quality-system/governance/jsdoc/parser_test.fixture.sql
@@ -0,0 +1,90 @@
+/**
+ * @governance {
+ *   dataSteward: "John Doe",
+ *   dataOwner: "HR Department",
+ *   classification: "Restricted"
+ * }
+ * @lineage {
+ *   input: {
+ *     source: "hr_management_system",
+ *     columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
+ *   },
+ *   transformations: {
+ *     type: "dataEntry",
+ *     description: "Data entered manually by HR personnel"
+ *   },
+ *   output: {
+ *     target: "Employee",
+ *     columns: ["employee_id", "first_name", "last_name", "email", "phone_number", "hire_date", "job_id", "salary"]
+ *   }
+ * } 
+ * @traceability {
+ *   jiraIssue: "HR-456"
+ * }
+ * @informationSchema {
+ *   table: "Employee",
+ *   description: "Table to store employee personal and work-related information.",
+ *   columns: {
+ *     employee_id: "Primary key identifier for employees.",
+ *     first_name: "Employee's first name.",
+ *     last_name: "Employee's last name.",
+ *     email: "Employee's email address.",
+ *     phone_number: "Employee's contact number.",
+ *     hire_date: "Date the employee was hired.",
+ *     job_id: "Identifier for the employee's job title.",
+ *     salary: "Employee's salary."
+ *   }
+ * }
+ */
+CREATE TABLE Employee (
+    employee_id INT PRIMARY KEY,
+    first_name VARCHAR(50),
+    last_name VARCHAR(50),
+    email VARCHAR(75),
+    phone_number VARCHAR(15),
+    hire_date DATE,
+    job_id VARCHAR(10),
+    salary DECIMAL(8, 2)
+);
+
+/**
+ * @governance {
+ *   dataSteward: "John Doe",
+ *   dataOwner: "HR Department",
+ *   classification: "Restricted"
+ * }
+ * @lineage {
+ *   input: {
+ *     source: "Employee",
+ *     columns: ["employee_id", "first_name", "last_name"]
+ *   },
+ *   transformations: {
+ *     type: "concatenation",
+ *     description: "Concatenating first and last names to generate full name."
+ *   },
+ *   output: {
+ *     target: "full_name",
+ *     columns: ["full_name"]
+ *   }
+ * }
+ * @traceability {
+ *   jiraIssue: "HR-123"
+ * }
+ * @param {
+ *   employee_id: "101"
+ * } employee_id - Identifier of the employee.
+ * @returns {
+ *   full_name: "Mathews"
+ * } - The full name of the employee.
+ */
+CREATE FUNCTION get_full_name(employee_id INT) RETURNS VARCHAR(101) AS
+$$
+DECLARE
+    full_name VARCHAR(101);
+BEGIN
+    SELECT first_name || ' ' || last_name INTO full_name
+    FROM Employee
+    WHERE Employee.employee_id = get_full_name.employee_id;
+    RETURN full_name;
+END;
+$$ LANGUAGE plpgsql;