Skip to content

Commit

Permalink
feat: Hash usernames using SHA1 when hiding names [PT-184362179]
Browse files Browse the repository at this point in the history
  • Loading branch information
dougmartin committed May 22, 2024
1 parent c01fcf3 commit 6a3a2cd
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 15 deletions.
4 changes: 2 additions & 2 deletions query-creator/create-query/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ const learnersReport = async (params, body, tokenServiceEnv, debugSQL, reportSer
const doLearnerLogReporting= async () => {
// generate the sql for the query
const sql = narrowLearners
? aws.generateNarrowLogSQL(queryIdsPerRunnable, authDomain, reportServiceSource) // hideNames not needed here as no learner info is output
: aws.generateLearnerLogSQL(queryIdsPerRunnable, authDomain, reportServiceSource, hideNames);
? aws.generateNarrowLogSQL(queryIdsPerRunnable, hideNames)
: aws.generateLearnerLogSQL(queryIdsPerRunnable, hideNames);

if (debugSQL) {
sqlOutput.push(sql);
Expand Down
33 changes: 20 additions & 13 deletions query-creator/create-query/steps/aws.js
Original file line number Diff line number Diff line change
Expand Up @@ -543,25 +543,31 @@ exports.generateSQL = (runnableInfo, usageReport, authDomain, sourceKey, hideNam
GROUP BY l.run_remote_endpoint )`
*/

const getLogCols = (hideNames) => {
return ["id", "session", "username", "application", "activity", "event", "event_value", "time", "parameters", "extras", "run_remote_endpoint", "timestamp"]
.map(col => `"log"."${col}"`)
.map(col => col === `"log"."username"` && hideNames ? `to_hex(sha1(cast("log"."username" as varbinary))) as username` : col)
}

const getLearnerCols = (hideNames) => {
return ["learner_id", "run_remote_endpoint", "class_id", "runnable_url", "student_id", "class", "school", "user_id", "offering_id", "permission_forms", "username", "student_name", "teachers", "last_run", "query_id"]
.map(col => `"learner"."${col}"`)
.map(col => col === `"learner"."username"` && hideNames ? `to_hex(sha1(cast("learner"."username" as varbinary))) as username` : col)
.map(col => col === `"learner"."student_name"` && hideNames ? `"learner"."student_id" as student_name` : col)
}

/*
Generates a very wide row including all fields from the log and learner.
*/
exports.generateLearnerLogSQL = (queryIdsPerRunnable, authDomain, sourceKey, hideNames) => {
exports.generateLearnerLogSQL = (queryIdsPerRunnable, hideNames) => {
const logDb = process.env.LOG_ATHENA_DB_NAME;
const runnableUrls = Object.keys(queryIdsPerRunnable);
const queryIds = Object.values(queryIdsPerRunnable);

const logCols = [
"id", "session", "username", "application", "activity", "event", "event_value", "time", "parameters", "extras", "run_remote_endpoint", "timestamp"
].map(col => `"log"."${col}"`)

const learnerCols = [
"learner_id", "run_remote_endpoint", "class_id", "runnable_url", "student_id", "class", "school", "user_id", "offering_id", "permission_forms", "username", "student_name", "teachers", "last_run", "query_id"
]
.map(col => `"learner"."${col}"`)
.map(col => col === `"learner"."student_name"` && hideNames ? `"learner"."student_id" as student_name` : col).join(", ")
const logCols = getLogCols(hideNames)
const learnerCols = getLearnerCols(hideNames)

const cols = logCols.concat(learnerCols)
const cols = logCols.concat(learnerCols).join(", ")

return `
-- name ${runnableUrls.join(", ")}
Expand Down Expand Up @@ -615,17 +621,18 @@ exports.generateUserLogSQL = (usernames, activities, start_date, end_date) => {
/*
Generates a smaller row of event details only, no portal info.
*/
exports.generateNarrowLogSQL = (queryIdsPerRunnable, authDomain, sourceKey) => {
exports.generateNarrowLogSQL = (queryIdsPerRunnable, hideNames) => {
const logDb = process.env.LOG_ATHENA_DB_NAME;
const runnableUrls = Object.keys(queryIdsPerRunnable);
const queryIds = Object.values(queryIdsPerRunnable);
const logCols = getLogCols(hideNames).join(", ");

return `
-- name ${runnableUrls.join(", ")}
-- type learner event log ⎯ [qids: ${queryIds.join(", ")}]
-- reportType narrow-learner-event-log
SELECT log.*
SELECT ${logCols}
FROM "${logDb}"."logs_by_time" log
INNER JOIN "report-service"."learners" learner
ON
Expand Down

0 comments on commit 6a3a2cd

Please sign in to comment.