From 6a3a2cd053875e235d236b1bc8ca48b4bd8f7a11 Mon Sep 17 00:00:00 2001 From: Doug Martin Date: Wed, 22 May 2024 15:56:46 -0400 Subject: [PATCH] feat: Hash usernames using SHA1 when hiding names [PT-184362179] --- query-creator/create-query/app.js | 4 +-- query-creator/create-query/steps/aws.js | 33 +++++++++++++++---------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/query-creator/create-query/app.js b/query-creator/create-query/app.js index f502201..d74cf42 100644 --- a/query-creator/create-query/app.js +++ b/query-creator/create-query/app.js @@ -139,8 +139,8 @@ const learnersReport = async (params, body, tokenServiceEnv, debugSQL, reportSer const doLearnerLogReporting= async () => { // generate the sql for the query const sql = narrowLearners - ? aws.generateNarrowLogSQL(queryIdsPerRunnable, authDomain, reportServiceSource) // hideNames not needed here as no learner info is output - : aws.generateLearnerLogSQL(queryIdsPerRunnable, authDomain, reportServiceSource, hideNames); + ? aws.generateNarrowLogSQL(queryIdsPerRunnable, hideNames) + : aws.generateLearnerLogSQL(queryIdsPerRunnable, hideNames); if (debugSQL) { sqlOutput.push(sql); diff --git a/query-creator/create-query/steps/aws.js b/query-creator/create-query/steps/aws.js index 6fc51e0..ac60cb0 100644 --- a/query-creator/create-query/steps/aws.js +++ b/query-creator/create-query/steps/aws.js @@ -543,25 +543,31 @@ exports.generateSQL = (runnableInfo, usageReport, authDomain, sourceKey, hideNam GROUP BY l.run_remote_endpoint )` */ +const getLogCols = (hideNames) => { + return ["id", "session", "username", "application", "activity", "event", "event_value", "time", "parameters", "extras", "run_remote_endpoint", "timestamp"] + .map(col => `"log"."${col}"`) + .map(col => col === `"log"."username"` && hideNames ? `to_hex(sha1(cast("log"."username" as varbinary))) as username` : col) +} + +const getLearnerCols = (hideNames) => { + return ["learner_id", "run_remote_endpoint", "class_id", "runnable_url", "student_id", "class", "school", "user_id", "offering_id", "permission_forms", "username", "student_name", "teachers", "last_run", "query_id"] + .map(col => `"learner"."${col}"`) + .map(col => col === `"learner"."username"` && hideNames ? `to_hex(sha1(cast("learner"."username" as varbinary))) as username` : col) + .map(col => col === `"learner"."student_name"` && hideNames ? `"learner"."student_id" as student_name` : col) +} + /* Generates a very wide row including all fields from the log and learner. */ -exports.generateLearnerLogSQL = (queryIdsPerRunnable, authDomain, sourceKey, hideNames) => { +exports.generateLearnerLogSQL = (queryIdsPerRunnable, hideNames) => { const logDb = process.env.LOG_ATHENA_DB_NAME; const runnableUrls = Object.keys(queryIdsPerRunnable); const queryIds = Object.values(queryIdsPerRunnable); - const logCols = [ - "id", "session", "username", "application", "activity", "event", "event_value", "time", "parameters", "extras", "run_remote_endpoint", "timestamp" - ].map(col => `"log"."${col}"`) - - const learnerCols = [ - "learner_id", "run_remote_endpoint", "class_id", "runnable_url", "student_id", "class", "school", "user_id", "offering_id", "permission_forms", "username", "student_name", "teachers", "last_run", "query_id" - ] - .map(col => `"learner"."${col}"`) - .map(col => col === `"learner"."student_name"` && hideNames ? `"learner"."student_id" as student_name` : col).join(", ") + const logCols = getLogCols(hideNames) + const learnerCols = getLearnerCols(hideNames) - const cols = logCols.concat(learnerCols) + const cols = logCols.concat(learnerCols).join(", ") return ` -- name ${runnableUrls.join(", ")} @@ -615,17 +621,18 @@ exports.generateUserLogSQL = (usernames, activities, start_date, end_date) => { /* Generates a smaller row of event details only, no portal info. */ -exports.generateNarrowLogSQL = (queryIdsPerRunnable, authDomain, sourceKey) => { +exports.generateNarrowLogSQL = (queryIdsPerRunnable, hideNames) => { const logDb = process.env.LOG_ATHENA_DB_NAME; const runnableUrls = Object.keys(queryIdsPerRunnable); const queryIds = Object.values(queryIdsPerRunnable); + const logCols = getLogCols(hideNames).join(", "); return ` -- name ${runnableUrls.join(", ")} -- type learner event log ⎯ [qids: ${queryIds.join(", ")}] -- reportType narrow-learner-event-log - SELECT log.* + SELECT ${logCols} FROM "${logDb}"."logs_by_time" log INNER JOIN "report-service"."learners" learner ON