Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add analytics wiring #4049

Merged
merged 23 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8b71385
add SQL function for gathering statistics
mathemancer Nov 22, 2024
9fd9038
wire up analytics counter to python
mathemancer Nov 22, 2024
1a5b45e
add models to hold analytics statistics
mathemancer Nov 25, 2024
c80043d
add mathesar.analytics module, basic functions
mathemancer Nov 25, 2024
4d27b6b
update model with record count and uploaded flag
mathemancer Nov 27, 2024
1efd090
add report uploader function (works with GCP cloud run function)
mathemancer Nov 27, 2024
62678be
remove excluded_schemas arg from SQL function
mathemancer Nov 27, 2024
1805d3e
add config setting with anonymized Analytics URL
mathemancer Nov 29, 2024
26698ef
Merge branch 'add_combo_decorator' into add_analytics_simple
mathemancer Nov 29, 2024
ed9278e
add cached analytics running wrapper
mathemancer Dec 2, 2024
b553a70
increase cache time to 5 minutes
mathemancer Dec 2, 2024
972f3f1
Merge branch 'develop' into add_analytics_simple
mathemancer Dec 2, 2024
bb13f2f
extract constants, add testmode check
mathemancer Dec 3, 2024
f3f27ed
combine stale report queries for efficiency
mathemancer Dec 3, 2024
2a15735
extract ANALYTICS_FREQUENCY for clarity
mathemancer Dec 3, 2024
7fbc0fc
add basic test for db object getter
mathemancer Dec 3, 2024
9c19811
add tests for analytics functions
mathemancer Dec 6, 2024
aad50ce
remove unused imports
mathemancer Dec 6, 2024
c5c0615
remove unused named variables
mathemancer Dec 6, 2024
a089f3d
Merge branch 'develop' into add_analytics_simple
mathemancer Dec 9, 2024
a952f22
clean up migrations
mathemancer Dec 9, 2024
40efb4b
Merge branch 'develop' into add_analytics_simple
mathemancer Dec 10, 2024
14af511
Use conventional aliases in query
mathemancer Dec 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/settings/common_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def pipe_delim(pipe_string):
MATHESAR_UI_SOURCE_LOCATION = os.path.join(BASE_DIR, 'mathesar_ui/')
MATHESAR_CAPTURE_UNHANDLED_EXCEPTION = os.environ.get('CAPTURE_UNHANDLED_EXCEPTION', default=False)
MATHESAR_STATIC_NON_CODE_FILES_LOCATION = os.path.join(BASE_DIR, 'mathesar/static/non-code/')
MATHESAR_ANALYTICS_URL = os.environ.get('MATHESAR_ANALYTICS_URL', default='https://example.com')

DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'

Expand Down
7 changes: 7 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def _get(db_name):
logger.debug('exit')


@pytest.fixture(autouse=True)
def disable_http_requests(monkeypatch):
def mock_urlopen(self, *args, **kwargs):
raise Exception("Requests to 3rd party addresses make bad tests")
monkeypatch.setattr("urllib3.connectionpool.HTTPConnectionPool.urlopen", mock_urlopen)


@pytest.fixture(scope="session")
def create_db(request, engine_cache):
"""
Expand Down
5 changes: 5 additions & 0 deletions db/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from db import connection as db_conn


def get_object_counts(conn):
return db_conn.exec_msar_func(conn, 'get_object_counts').fetchone()[0]
34 changes: 34 additions & 0 deletions db/sql/00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ SELECT msar.drop_all_msar_functions();
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------


CREATE OR REPLACE FUNCTION msar.mathesar_system_schemas() RETURNS text[] AS $$/*
Return a text array of the Mathesar System schemas.

Update this function whenever the list changes.
*/
SELECT ARRAY['msar', '__msar', 'mathesar_types']
$$ LANGUAGE SQL STABLE;


CREATE OR REPLACE FUNCTION msar.extract_smallints(v jsonb) RETURNS smallint[] AS $$/*
From the supplied JSONB value, extract all top-level JSONB array elements which can be successfully
cast to PostgreSQL smallint values. Return the resulting array of smallint values.
Expand Down Expand Up @@ -1083,6 +1093,30 @@ WHERE has_privilege;
$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.get_object_counts() RETURNS jsonb AS $$/*
Return a JSON object with counts of some objects in the database.

We exclude the mathesar-system schemas.

The objects counted are:
- total schemas, excluding Mathesar internal schemas
- total tables in the included schemas
- total rows of tables included
*/
SELECT jsonb_build_object(
'schema_count', COUNT(DISTINCT pgn.oid),
'table_count', COUNT(pgc.oid),
'record_count', SUM(pgc.reltuples)
)
FROM pg_catalog.pg_namespace pgn
LEFT JOIN pg_catalog.pg_class pgc ON pgc.relnamespace = pgn.oid AND pgc.relkind = 'r'
WHERE pgn.nspname <> 'information_schema'
AND NOT (pgn.nspname = ANY(msar.mathesar_system_schemas()))
AND pgn.nspname NOT LIKE 'pg_%';
$$ LANGUAGE SQL STABLE;


CREATE OR REPLACE FUNCTION msar.schema_info_table() RETURNS TABLE
(
oid bigint, -- The OID of the schema.
Expand Down
18 changes: 18 additions & 0 deletions db/sql/test_00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5907,3 +5907,21 @@ BEGIN
);
END;
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION test_get_object_counts() RETURNS SETOF TEXT AS $$
DECLARE
object_counts jsonb;
BEGIN
CREATE SCHEMA anewone;
CREATE TABLE anewone.mytab (col1 text);
CREATE TABLE "12345" (bleh text, bleh2 numeric);
CREATE TABLE tableno3 (id INTEGER);
object_counts = msar.get_object_counts();
RETURN NEXT is((object_counts ->> 'schema_count')::integer, 2);
RETURN NEXT is((object_counts ->> 'table_count')::integer, 3);
-- Can't check actual record count without a vacuum, since we just estimate based on catalog.
-- So, we just check that the expected key exists.
RETURN NEXT is(object_counts ? 'record_count', true);
END;
$$ LANGUAGE plpgsql;
1 change: 1 addition & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ services:
- DJANGO_SETTINGS_MODULE=${DJANGO_SETTINGS_MODULE-config.settings.development}
- ALLOWED_HOSTS=${ALLOWED_HOSTS-*}
- SECRET_KEY=${SECRET_KEY}
- MATHESAR_ANALYTICS_URL=${MATHESAR_ANALYTICS_URL-https://example.com}
- MATHESAR_DATABASES=(mathesar_tables|postgresql://mathesar:mathesar@mathesar_dev_db:5432/mathesar)
- DJANGO_SUPERUSER_PASSWORD=password
- POSTGRES_DB=mathesar_django
Expand Down
139 changes: 139 additions & 0 deletions mathesar/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""
This module contains functions for dealing with analytics in Mathesar.

The basic principle is: If there is an installation_id, analytics are
"turned on", and can and will be collected. Otherwise they won't.

Thus, the `disable_analytics` function simply deletes that ID, if it
exists.
"""
from functools import wraps
import threading
from uuid import uuid4

from django.core.cache import cache
from django.conf import settings
from django.db.models import Q
from django.utils import timezone
import requests

from mathesar import __version__
from mathesar.models import (
AnalyticsReport,
ConfiguredRole,
Database,
Explorations,
InstallationID,
User,
)

ANALYTICS_DONE = "analytics_done"
CACHE_TIMEOUT = 1800 # seconds
ACTIVE_USER_DAYS = 14
ANALYTICS_REPORT_MAX_AGE = 30 # days
ANALYTICS_FREQUENCY = 1 # a report is saved at most once per day.


def wire_analytics(f):
@wraps(f)
def wrapped(*args, **kwargs):
if settings.TEST is False and cache.get(ANALYTICS_DONE) is None:
cache.set(ANALYTICS_DONE, True, CACHE_TIMEOUT)
threading.Thread(target=run_analytics).start()
return f(*args, **kwargs)
return wrapped


def run_analytics():
if (
InstallationID.objects.first() is not None
and not AnalyticsReport.objects.filter(
created_at__gte=timezone.now()
- timezone.timedelta(days=ANALYTICS_FREQUENCY)
)
):
save_analytics_report()
upload_analytics_reports()
delete_stale_reports()


def initialize_analytics():
InstallationID.objects.create(value=uuid4())


def disable_analytics():
InstallationID.objects.all().delete()


def save_analytics_report():
installation_id = InstallationID.objects.first()
if installation_id is None:
return
connected_database_count = 0
connected_database_schema_count = 0
connected_database_table_count = 0
connected_database_record_count = 0
for d in Database.objects.all():
try:
object_counts = d.object_counts
connected_database_count += 1
connected_database_schema_count += object_counts['schema_count']
connected_database_table_count += object_counts['table_count']
connected_database_record_count += object_counts['record_count']
except Exception:
print(f"Couldn't retrieve object counts for {d.name}")

analytics_report = AnalyticsReport(
installation_id=installation_id,
mathesar_version=__version__,
user_count=User.objects.filter(is_active=True).count(),
active_user_count=User.objects.filter(
is_active=True,
last_login__gte=timezone.now()
- timezone.timedelta(days=ACTIVE_USER_DAYS)
).count(),
configured_role_count=ConfiguredRole.objects.count(),
connected_database_count=connected_database_count,
connected_database_schema_count=connected_database_schema_count,
connected_database_table_count=connected_database_table_count,
connected_database_record_count=connected_database_record_count,
exploration_count=Explorations.objects.count(),
)
analytics_report.save()


def upload_analytics_reports():
reports = AnalyticsReport.objects.filter(uploaded=False)
reports_blob = [
{
"id": report.id,
"created_at": report.created_at.isoformat(),
"installation_id": str(report.installation_id.value),
"mathesar_version": report.mathesar_version,
"user_count": report.user_count,
"active_user_count": report.active_user_count,
"configured_role_count": report.configured_role_count,
"connected_database_count": report.connected_database_count,
"connected_database_schema_count": report.connected_database_schema_count,
"connected_database_table_count": report.connected_database_table_count,
"connected_database_record_count": report.connected_database_record_count,
"exploration_count": report.exploration_count,
}
for report in reports
]
requests.post(settings.MATHESAR_ANALYTICS_URL, json=reports_blob)
reports.update(uploaded=True)


def delete_stale_reports():
AnalyticsReport.objects.filter(
Q(
# Delete uploaded analytics objects older than 2 days
uploaded=True,
created_at__lte=timezone.now() - timezone.timedelta(days=2)
) | Q(
# Delete analytics reports after a time regardless of upload status
updated_at__lte=timezone.now()
- timezone.timedelta(days=ANALYTICS_REPORT_MAX_AGE)
)
).delete()
48 changes: 48 additions & 0 deletions mathesar/migrations/0022_installationid_analyticsreport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Generated by Django 4.2.16 on 2024-12-09 07:39

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('mathesar', '0021_database_last_confirmed_sql_version'),
]

operations = [
migrations.CreateModel(
name='InstallationID',
fields=[
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('id', models.IntegerField(default=1, primary_key=True, serialize=False)),
('value', models.UUIDField()),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='AnalyticsReport',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('mathesar_version', models.CharField()),
('user_count', models.PositiveIntegerField(blank=True, null=True)),
('active_user_count', models.PositiveIntegerField(blank=True, null=True)),
('configured_role_count', models.PositiveIntegerField(blank=True, null=True)),
('connected_database_count', models.PositiveIntegerField(blank=True, null=True)),
('connected_database_schema_count', models.PositiveIntegerField(blank=True, null=True)),
('connected_database_table_count', models.PositiveIntegerField(blank=True, null=True)),
('connected_database_record_count', models.PositiveBigIntegerField(blank=True, null=True)),
('exploration_count', models.PositiveIntegerField(blank=True, null=True)),
('uploaded', models.BooleanField(default=False)),
('installation_id', models.ForeignKey(default=1, on_delete=django.db.models.deletion.CASCADE, to='mathesar.installationid')),
],
options={
'abstract': False,
},
),
]
2 changes: 2 additions & 0 deletions mathesar/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
# We need to do this to register the model correctly in Django settings
from .users import User # noqa
from .analytics import InstallationID, AnalyticsReport # noqa
from .base import * # noqa
24 changes: 24 additions & 0 deletions mathesar/models/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from django.db import models
from mathesar.models.base import BaseModel


class InstallationID(BaseModel):
# We shouldn't increment this, since only one row is allowed.
id = models.IntegerField(primary_key=True, default=1)
value = models.UUIDField()


class AnalyticsReport(BaseModel):
installation_id = models.ForeignKey(
'InstallationID', default=1, on_delete=models.CASCADE
)
mathesar_version = models.CharField()
user_count = models.PositiveIntegerField(null=True, blank=True)
active_user_count = models.PositiveIntegerField(null=True, blank=True)
configured_role_count = models.PositiveIntegerField(null=True, blank=True)
connected_database_count = models.PositiveIntegerField(null=True, blank=True)
connected_database_schema_count = models.PositiveIntegerField(null=True, blank=True)
connected_database_table_count = models.PositiveIntegerField(null=True, blank=True)
connected_database_record_count = models.PositiveBigIntegerField(null=True, blank=True)
exploration_count = models.PositiveIntegerField(null=True, blank=True)
uploaded = models.BooleanField(default=False)
12 changes: 12 additions & 0 deletions mathesar/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import psycopg

from db.sql.install import install as install_sql
from db.analytics import get_object_counts
from mathesar import __version__
from mathesar.models import exceptions

Expand Down Expand Up @@ -47,6 +48,17 @@ class Meta:
)
]

@property
def object_counts(self):
for role_map in UserDatabaseRoleMap.objects.filter(database=self):
try:
with role_map.connection as conn:
return get_object_counts(conn)
except Exception:
pass
else:
raise exceptions.NoConnectionAvailable

@property
def needs_upgrade_attention(self):
return self.last_confirmed_sql_version != __version__
Expand Down
3 changes: 2 additions & 1 deletion mathesar/rpc/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
http_basic_auth_superuser_required,
)
from mathesar.rpc.exceptions.handlers import handle_rpc_exceptions
from mathesar.analytics import wire_analytics


def mathesar_rpc_method(*, name, auth="superuser"):
Expand All @@ -24,5 +25,5 @@ def mathesar_rpc_method(*, name, auth="superuser"):
raise Exception("`auth` must be 'superuser' or 'login'")

def combo_decorator(f):
return rpc_method(name=name)(auth_wrap(handle_rpc_exceptions(f)))
return rpc_method(name=name)(auth_wrap(wire_analytics(handle_rpc_exceptions(f))))
return combo_decorator
Loading
Loading