Skip to content

Commit

Permalink
Usage metrics for 2024.3 release (getodk#1345)
Browse files Browse the repository at this point in the history
* Beginning to add new usage metrics for 2024.3 release

* change defaults to 0

* Additional system metrics

* removing entity_create source metrics to put in another pr/commit

* Make dataset usage metric faster by pulling properties query out

* Add some of the create counts

* Count entities created with bulk operation

* Fixing a little bug in uploaded blob reporting

* remove project has_description

* Filter encypted and upgraded form defs
  • Loading branch information
ktuite authored Dec 14, 2024
1 parent 8837aa5 commit efdbc3a
Show file tree
Hide file tree
Showing 4 changed files with 423 additions and 32 deletions.
2 changes: 1 addition & 1 deletion config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"analytics": {
"url": "https://data.getodk.cloud/v1/key/eOZ7S4bzyUW!g1PF6dIXsnSqktRuewzLTpmc6ipBtRq$LDfIMTUKswCexvE0UwJ9/projects/1/forms/odk-analytics/submissions",
"formId": "odk-analytics",
"version": "v2024.2.0_1"
"version": "v2024.3.0_1"
},
"s3blobStore": {}
}
Expand Down
10 changes: 8 additions & 2 deletions lib/data/analytics.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const metricsTemplate = {
"num_unique_collectors": {},
"database_size": {},
"uses_external_db": 0,
"uses_external_blob_store": 0,
"sso_enabled": 0,
"num_client_audit_attachments": 0,
"num_client_audit_attachments_failures": 0,
Expand All @@ -41,8 +42,14 @@ const metricsTemplate = {
"num_offline_entity_branches": 0,
"num_offline_entity_interrupted_branches": 0,
"num_offline_entity_submissions_reprocessed": 0,
"num_offline_entity_submissions_force_processed": 0,
"max_entity_submission_delay": 0,
"avg_entity_submission_delay": 0
"avg_entity_submission_delay": 0,
"max_entity_branch_delay": 0,
"num_xml_only_form_defs": 0,
"num_blob_files": 0,
"num_blob_files_on_s3": 0,
"num_reset_failed_to_pending_count": 0
},
"projects": [
{
Expand Down Expand Up @@ -151,7 +158,6 @@ const metricsTemplate = {
}
},
"other": {
"has_description": 0,
"description_length": 0
},
"datasets": [{
Expand Down
146 changes: 129 additions & 17 deletions lib/model/query/analytics.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ const databaseExternal = (dbHost) => () =>
? 0
: 1);


const blobStoreExternal = (s3Settings) => () =>
(s3Settings && s3Settings.server
? 1
: 0);

////////////////////////////////////////////////////////
// SQL QUERIES

Expand Down Expand Up @@ -139,6 +145,24 @@ WHERE
const countClientAuditRows = () => ({ oneFirst }) => oneFirst(sql`
SELECT count(*) FROM client_audits`);

const countXmlOnlyFormDefs = () => ({ oneFirst }) => oneFirst(sql`
SELECT count(*)
FROM form_defs
WHERE "xlsBlobId" IS NULL
AND "keyId" IS NULL
AND "version" NOT LIKE '%[upgrade]%'`);

const countBlobFiles = () => ({ one }) => one(sql`
SELECT
COUNT(*) AS total_blobs,
SUM(CASE WHEN "s3_status" = 'uploaded' THEN 1 ELSE 0 END) AS uploaded_blobs
FROM blobs`);

const countResetFailedToPending = () => ({ oneFirst }) => oneFirst(sql`
SELECT COUNT(*)
FROM audits
WHERE action = 'blobs.s3.failed-to-pending'`);

// PER PROJECT
// Users
const countUsersPerRole = () => ({ all }) => all(sql`
Expand Down Expand Up @@ -397,7 +421,7 @@ group by f."projectId"`);
// Datasets
const getDatasets = () => ({ all }) => all(sql`
SELECT
ds.id, ds."projectId", COUNT(DISTINCT p.id) num_properties, COUNT(DISTINCT e.id) num_entities_total,
ds.id, ds."projectId", COUNT(DISTINCT e.id) num_entities_total,
COUNT(DISTINCT CASE WHEN e."createdAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_recent,
COUNT(DISTINCT CASE WHEN e."updatedAt" IS NOT NULL THEN e.id END) num_entities_updated_total,
COUNT(DISTINCT CASE WHEN e."updatedAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_updated_recent,
Expand All @@ -412,9 +436,14 @@ SELECT
MAX(COALESCE(updates.update_api_total, 0)) num_entity_updates_api_total,
MAX(COALESCE(updates.update_api_recent, 0)) num_entity_updates_api_recent,
MAX(COALESCE(conflict_stats.conflicts, 0)) num_entity_conflicts,
MAX(COALESCE(conflict_stats.resolved, 0)) num_entity_conflicts_resolved
MAX(COALESCE(conflict_stats.resolved, 0)) num_entity_conflicts_resolved,
MAX(COALESCE(creates.create_sub_total, 0)) num_entity_create_sub_total,
MAX(COALESCE(creates.create_sub_recent, 0)) num_entity_create_sub_recent,
MAX(COALESCE(creates.create_api_total, 0)) num_entity_create_api_total,
MAX(COALESCE(creates.create_api_recent, 0)) num_entity_create_api_recent,
MAX(COALESCE(bulk_creates.total, 0)) num_entity_create_bulk_total,
MAX(COALESCE(bulk_creates.recent, 0)) num_entity_create_bulk_recent
FROM datasets ds
LEFT JOIN ds_properties p ON p."datasetId" = ds.id AND p."publishedAt" IS NOT NULL
LEFT JOIN entities e ON e."datasetId" = ds.id
LEFT JOIN (dataset_form_defs dfd
JOIN form_defs fd ON fd.id = dfd."formDefId"
Expand Down Expand Up @@ -450,6 +479,33 @@ FROM datasets ds
WHERE a."action" = 'entity.update.version'
GROUP BY e."datasetId"
) as updates ON ds.id = updates."datasetId"
LEFT JOIN (
SELECT
COUNT (1) total,
SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL THEN 1 ELSE 0 END) create_sub_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) create_sub_recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL THEN 1 ELSE 0 END) create_api_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) create_api_recent,
e."datasetId"
FROM audits a
JOIN entities e on CAST((a.details ->> 'entityId'::TEXT) AS integer) = e.id
WHERE a."action" = 'entity.create'
GROUP BY e."datasetId"
) as creates ON ds.id = creates."datasetId"
LEFT JOIN (
SELECT count(1) total,
SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
e."datasetId"
FROM audits a
JOIN entity_def_sources eds on CAST((a.details ->> 'sourceId'::TEXT) AS integer) = eds."id"
JOIN entity_defs ed on ed."sourceId" = eds.id AND root=true
JOIN entities e on ed."entityId" = e.id
WHERE a."action" = 'entity.bulk.create'
GROUP BY e."datasetId"
) as bulk_creates on ds.id = bulk_creates."datasetId"
LEFT JOIN (
SELECT COUNT (1) conflicts,
SUM (CASE WHEN e."conflict" IS NULL THEN 1 ELSE 0 END) resolved,
Expand All @@ -476,6 +532,15 @@ WHERE audits.action = 'entity.bulk.create'
GROUP BY ds.id, ds."projectId"
`);

const getDatasetProperties = () => ({ all }) => all(sql`
SELECT
ds.id, ds."projectId", COUNT(DISTINCT p.id) num_properties
FROM datasets ds
LEFT JOIN ds_properties p ON p."datasetId" = ds.id AND p."publishedAt" IS NOT NULL
WHERE ds."publishedAt" IS NOT NULL
GROUP BY ds.id, ds."projectId";
`);


// Offline entities

Expand Down Expand Up @@ -523,10 +588,13 @@ FROM duplicateRuns;

// Number of submissions temporarily held in backlog but were automatically
// removed from backlog when preceeding submission came in
const countSubmissionReprocess = () => ({ oneFirst }) => oneFirst(sql`
SELECT COUNT(*)
const countSubmissionBacklogEvents = () => ({ one }) => one(sql`
SELECT
COUNT(CASE WHEN "action" = 'submission.backlog.hold' THEN 1 END) AS "submission.backlog.hold",
COUNT(CASE WHEN "action" = 'submission.backlog.reprocess' THEN 1 END) AS "submission.backlog.reprocess",
COUNT(CASE WHEN "action" = 'submission.backlog.force' THEN 1 END) AS "submission.backlog.force"
FROM audits
WHERE "action" = 'submission.backlog.reprocess'
WHERE "action" IN ('submission.backlog.hold', 'submission.backlog.reprocess', 'submission.backlog.force')
`);

// Measure how much time entities whose source is a submission.create
Expand Down Expand Up @@ -554,6 +622,21 @@ JOIN submission_defs as sd
ON eds."submissionDefId" = sd.id;
`);

const measureMaxEntityBranchTime = () => ({ oneFirst }) => oneFirst(sql`
SELECT
COALESCE(MAX(AGE(max_created_at, min_created_at)), '0 seconds'::interval) AS max_time_difference
FROM (
SELECT
"branchId",
"entityId",
MIN("createdAt") AS min_created_at,
MAX("createdAt") AS max_created_at
FROM entity_defs
GROUP BY "branchId", "entityId"
HAVING "branchId" IS NOT NULL
) AS subquery;
`);

// Other
const getProjectsWithDescriptions = () => ({ all }) => all(sql`
select id as "projectId", length(trim(description)) as description_length from projects where coalesce(trim(description),'')!=''`);
Expand All @@ -575,11 +658,12 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([
Analytics.countSubmissionsByUserType,
Analytics.getProjectsWithDescriptions,
Analytics.getDatasets,
Analytics.getDatasetEvents
Analytics.getDatasetEvents,
Analytics.getDatasetProperties
]).then(([ userRoles, appUsers, deviceIds, pubLinks,
forms, formGeoRepeats, formsEncrypt, formStates, reusedIds,
subs, subStates, subEdited, subComments, subUsers,
projWithDesc, datasets, datasetEvents ]) => {
projWithDesc, datasets, datasetEvents, datasetProperties ]) => {
const projects = {};

// users
Expand Down Expand Up @@ -692,9 +776,13 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([
const eventsRow = datasetEvents.find(d => (d.projectId === row.projectId && d.id === row.id)) ||
{ num_bulk_create_events_total: 0, num_bulk_create_events_recent: 0, biggest_bulk_upload: 0 };

// Properties row
const propertiesRow = datasetProperties.find(d => (d.projectId === row.projectId && d.id === row.id)) ||
{ num_properties: 0 };

project.datasets.push({
id: row.id,
num_properties: row.num_properties,
num_properties: propertiesRow.num_properties,
num_creation_forms: row.num_creation_forms,
num_followup_forms: row.num_followup_forms,
num_entities: { total: row.num_entities_total, recent: row.num_entities_recent },
Expand All @@ -710,14 +798,18 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([

// 2024.1 metrics
num_bulk_create_events: { total: eventsRow.num_bulk_create_events_total, recent: eventsRow.num_bulk_create_events_recent },
biggest_bulk_upload: eventsRow.biggest_bulk_upload
biggest_bulk_upload: eventsRow.biggest_bulk_upload,

// 2024.3 metrics
num_entity_creates_sub: { total: row.num_entity_create_sub_total, recent: row.num_entity_create_sub_recent },
num_entity_creates_api: { total: row.num_entity_create_api_total, recent: row.num_entity_create_api_recent },
num_entity_creates_bulk: { total: row.num_entity_create_bulk_total, recent: row.num_entity_create_bulk_recent }
});
}

// other
for (const row of projWithDesc) {
const project = _getProject(projects, row.projectId);
project.other.has_description = 1;
project.other.description_length = row.description_length;
}

Expand All @@ -739,15 +831,19 @@ const previewMetrics = () => (({ Analytics }) => runSequentially([
Analytics.countClientAuditAttachments,
Analytics.countClientAuditProcessingFailed,
Analytics.countClientAuditRows,
Analytics.countXmlOnlyFormDefs,
Analytics.countBlobFiles,
Analytics.countResetFailedToPending,
Analytics.countOfflineBranches,
Analytics.countInterruptedBranches,
Analytics.countSubmissionReprocess,
Analytics.countSubmissionBacklogEvents,
Analytics.measureEntityProcessingTime,
Analytics.measureMaxEntityBranchTime,
Analytics.projectMetrics
]).then(([db, encrypt, bigForm, admins, audits,
archived, managers, viewers, collectors,
caAttachments, caFailures, caRows,
oeBranches, oeInterruptedBranches, oeSubReprocess, oeProcessingTime,
caAttachments, caFailures, caRows, xmlDefs, blobFiles, resetFailedToPending,
oeBranches, oeInterruptedBranches, oeBacklogEvents, oeProcessingTime, oeBranchTime,
projMetrics]) => {
const metrics = clone(metricsTemplate);
// system
Expand Down Expand Up @@ -785,10 +881,20 @@ const previewMetrics = () => (({ Analytics }) => runSequentially([
// 2024.2.0 offline entity metrics
metrics.system.num_offline_entity_branches = oeBranches;
metrics.system.num_offline_entity_interrupted_branches = oeInterruptedBranches;
metrics.system.num_offline_entity_submissions_reprocessed = oeSubReprocess;
metrics.system.num_offline_entity_submissions_reprocessed = oeBacklogEvents['submission.backlog.reprocess'];

metrics.system.max_entity_submission_delay = oeProcessingTime.max_wait;
metrics.system.avg_entity_submission_delay = oeProcessingTime.avg_wait;

// 2024.3.0 offline entity metrics
metrics.system.num_offline_entity_submissions_force_processed = oeBacklogEvents['submission.backlog.force'];
metrics.system.max_entity_branch_delay = oeBranchTime;
metrics.system.num_xml_only_form_defs = xmlDefs;
metrics.system.uses_external_blob_store = Analytics.blobStoreExternal(config.get('default.external.s3blobStore'));
metrics.system.num_blob_files = blobFiles.total_blobs;
metrics.system.num_blob_files_on_s3 = blobFiles.uploaded_blobs;
metrics.system.num_reset_failed_to_pending_count = resetFailedToPending;

return metrics;
}));

Expand All @@ -812,12 +918,16 @@ module.exports = {
biggestForm,
databaseSize,
databaseExternal,
blobStoreExternal,
countAdmins,
countAppUsers,
countBlobFiles,
countResetFailedToPending,
countDeviceIds,
countClientAuditAttachments,
countClientAuditProcessingFailed,
countClientAuditRows,
countXmlOnlyFormDefs,
countForms,
countFormsEncrypted,
countFormFieldTypes,
Expand All @@ -840,9 +950,11 @@ module.exports = {
getLatestAudit,
getDatasets,
getDatasetEvents,
getDatasetProperties,
countOfflineBranches,
countInterruptedBranches,
countSubmissionReprocess,
countSubmissionBacklogEvents,
measureEntityProcessingTime,
measureElapsedEntityTime
measureElapsedEntityTime,
measureMaxEntityBranchTime
};
Loading

0 comments on commit efdbc3a

Please sign in to comment.