Skip to content

Commit

Permalink
Refactor dashboards data queries
Browse files Browse the repository at this point in the history
All dashboards queries (user, assignments, courses) have to main parts:

- Filtering down the data we have access too, either as an instructor or
  admin

- Filtering down base on the dropdown selections

This commit generalizes the first part to be always course based. Only
select data that belongs to courses where the instructor belongs or from
organizations where the user is and admin.

Then we select users, assignment (or directly the courses) that belong to
those courses.

This allows to greatly reduce the dataset size in the first step and
narrow it down in the second.
  • Loading branch information
marcospri committed Sep 10, 2024
1 parent 26ae6da commit a536df8
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 163 deletions.
43 changes: 10 additions & 33 deletions lms/services/assignment.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
import logging
from typing import cast

from sqlalchemy import BinaryExpression, Select, false, func, or_, select
from sqlalchemy import Select, func, select
from sqlalchemy.orm import Session

from lms.models import (
ApplicationInstance,
Assignment,
AssignmentGrouping,
AssignmentMembership,
AutoGradingConfig,
Course,
Grouping,
LTIRole,
RoleScope,
RoleType,
User,
)
from lms.services.course import CourseService
Expand Down Expand Up @@ -267,35 +263,19 @@ def get_assignments( # noqa: PLR0913
:param h_userids: return only assignments where these users are members.
:param assignment_ids: return only assignments with these IDs.
"""
query = select(Assignment).where(Assignment.title.is_not(None))
candidate_courses = CourseService.courses_permission_check_query(
instructor_h_userid, admin_organization_ids, course_ids
).cte("candidate_courses")

query = (
select(Assignment)
.join(candidate_courses, candidate_courses.c[0] == Assignment.course_id)
.where(Assignment.title.is_not(None))
)

if assignment_ids:
query = query.where(Assignment.id.in_(assignment_ids))

# Let's crate no op clauses by default to avoid having to check the presence of these filters
instructor_h_userid_clause = cast(BinaryExpression, false())
admin_organization_ids_clause = cast(BinaryExpression, false())

if instructor_h_userid:
instructor_h_userid_clause = Assignment.course_id.in_(
CourseService.course_ids_with_role_query(
instructor_h_userid, RoleScope.COURSE, RoleType.INSTRUCTOR
)
)

if admin_organization_ids:
admin_organization_ids_clause = Assignment.id.in_(
select(Assignment.id)
.join(Course)
.join(ApplicationInstance)
.where(ApplicationInstance.organization_id.in_(admin_organization_ids))
)
# instructor_h_userid and admin_organization_ids are about access rather than filtering.
# we apply them both as an or to fetch assignments where the users is either an instructor or an admin
query = query.where(
or_(instructor_h_userid_clause, admin_organization_ids_clause)
)

if h_userids:
query = query.where(
Assignment.id.in_(
Expand All @@ -305,9 +285,6 @@ def get_assignments( # noqa: PLR0913
)
)

if course_ids:
query = query.where(Assignment.course_id.in_(course_ids))

return query.order_by(Assignment.title, Assignment.id)

def get_courses_assignments_count(self, **kwargs) -> dict[int, int]:
Expand Down
107 changes: 61 additions & 46 deletions lms/services/course.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import json
from copy import deepcopy
from typing import cast

from sqlalchemy import BinaryExpression, Select, Text, column, false, func, or_, select
from sqlalchemy import Select, Text, column, func, select, union

from lms.db import full_text_match
from lms.models import (
Expand Down Expand Up @@ -89,7 +88,6 @@ def _search_query( # noqa: PLR0913, PLR0917
name: str | None = None,
limit: int | None = 100,
organization_ids: list[int] | None = None,
h_userids: list[str] | None = None,
):
query = self._db.query(Course)

Expand All @@ -115,14 +113,6 @@ def _search_query( # noqa: PLR0913, PLR0917
.filter(Organization.id.in_(organization_ids))
)

if h_userids:
# Only courses these h_userids belong to
query = (
query.join(GroupingMembership)
.join(User)
.filter(User.h_userid.in_(h_userids))
)

return query.limit(limit)

def search( # noqa: PLR0913, PLR0917
Expand All @@ -133,7 +123,6 @@ def search( # noqa: PLR0913, PLR0917
name: str | None = None,
limit: int | None = 100,
organization_ids: list[int] | None = None,
h_userids: list[str] | None = None,
) -> list[Course]:
return self._search_query(
id_=id_,
Expand All @@ -142,7 +131,6 @@ def search( # noqa: PLR0913, PLR0917
name=name,
limit=limit,
organization_ids=organization_ids,
h_userids=h_userids,
).all()

def get_courses( # noqa: PLR0913
Expand All @@ -161,58 +149,85 @@ def get_courses( # noqa: PLR0913
:param assignment_ids: return only the courses these assignments belong to.
:param course_ids: return only courses with these IDs.
"""
courses_query = (
self._search_query(h_userids=h_userids, limit=None)
candidate_courses = CourseService.courses_permission_check_query(
instructor_h_userid, admin_organization_ids, course_ids
).cte("candidate_courses")

query = (
select(Course.id)
# Deduplicate courses by authority_provided_id, take the last updated one
.distinct(Course.authority_provided_id)
.join(candidate_courses, candidate_courses.c[0] == Course.id)
.order_by(Course.authority_provided_id, Course.updated.desc())
# Only select the ID of the deduplicated courses
).with_entities(Course.id)

if course_ids:
courses_query = courses_query.where(Course.id.in_(course_ids))

# Let's crate no op clauses by default to avoid having to check the presence of these filters
instructor_h_userid_clause = cast(BinaryExpression, false())
admin_organization_ids_clause = cast(BinaryExpression, false())
if instructor_h_userid:
instructor_h_userid_clause = Course.id.in_(
self.course_ids_with_role_query(
instructor_h_userid, RoleScope.COURSE, RoleType.INSTRUCTOR
)
)
if admin_organization_ids:
admin_organization_ids_clause = Course.application_instance_id.in_(
select(ApplicationInstance.id).where(
ApplicationInstance.organization_id.in_(admin_organization_ids)
)
)
# instructor_h_userid and admin_organization_ids are about access rather than filtering.
# we apply them both as an or to fetch courses where the users is either an instructor or an admin
courses_query = courses_query.where(
or_(instructor_h_userid_clause, admin_organization_ids_clause)
)

if assignment_ids:
courses_query = courses_query.where(
query = query.where(
Course.id.in_(
select(AssignmentGrouping.grouping_id).where(
AssignmentGrouping.assignment_id.in_(assignment_ids)
)
)
)

if h_userids:
query = query.where(
Course.id.in_(
select(GroupingMembership.grouping_id)
.join(User)
.where(User.h_userid.in_(h_userids))
)
)

return (
select(Course)
.where(
Course.id.in_(courses_query)
# We can sort these again without affecting deduplication
)
.where(Course.id.in_(query))
# We can sort these again without affecting deduplication
.order_by(Course.lms_name, Course.id)
)

@staticmethod
def course_ids_with_role_query(
def courses_permission_check_query(
instructor_h_userid: str | None = None,
admin_organization_ids: list[int] | None = None,
course_ids: list[int] | None = None,
):
courses_where_instructor_query = select(None) # type: ignore
courses_where_admin_query = select(None) # type: ignore

if instructor_h_userid:
# Get all the courses where instructor_h_userid is an instructor. This will query AssignmentMembership, which includes roles
courses_where_instructor_query = CourseService._course_ids_with_role_query(
instructor_h_userid, RoleScope.COURSE, RoleType.INSTRUCTOR
)

if course_ids:
courses_where_instructor_query = courses_where_instructor_query.where(
AssignmentGrouping.grouping_id.in_(course_ids)
)

if admin_organization_ids:
# Also get all the courses of organiations where this users is an admin
courses_where_admin_query = (
select(Course.id)
.join(ApplicationInstance)
.where(
ApplicationInstance.organization_id.in_(admin_organization_ids),
)
)

if course_ids:
courses_where_admin_query = courses_where_admin_query.where(
Course.id.in_(course_ids)
)

return union(
courses_where_admin_query,
courses_where_instructor_query,
)

@staticmethod
def _course_ids_with_role_query(
h_userid: str, role_scope, role_type
) -> Select[tuple[int]]:
"""Return a query that returns all the Course.id where h_userid belongs as (role_scope, role_type)."""
Expand Down
84 changes: 25 additions & 59 deletions lms/services/user.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
from functools import lru_cache
from typing import cast

from sqlalchemy import BinaryExpression, false, or_, select
from sqlalchemy import select
from sqlalchemy.exc import NoResultFound
from sqlalchemy.sql import Select

from lms.models import (
ApplicationInstance,
Assignment,
AssignmentMembership,
GroupingMembership,
LMSUser,
LMSUserApplicationInstance,
LTIRole,
Expand Down Expand Up @@ -162,75 +159,44 @@ def get_users( # noqa: PLR0913, PLR0917
:param course_ids: return only users that belong to these courses.
:param assignment_ids: return only users that belong these assignments.
"""
query = select(User.id)

# A few of the filters need to join with assignment_membership.
# Avoid joins and/or multiple subqueries by building a subquery and filtering the main query
# by it at the end
assignment_membership_subquery = select(AssignmentMembership.user_id).where(
AssignmentMembership.lti_role_id.in_(
select(LTIRole.id).where(
LTIRole.scope == role_scope, LTIRole.type == role_type
)
)
)

# Let's crate no op clauses by default to avoid having to check the presence of these filters
instructor_h_userid_clause = cast(BinaryExpression, false())
admin_organization_ids_clause = cast(BinaryExpression, false())

if instructor_h_userid:
instructor_h_userid_clause = User.id.in_(
# GroupingMembership has membership information, but it lacks role info
select(GroupingMembership.user_id).where(
GroupingMembership.grouping_id.in_(
# Get all the courses where instructor_h_userid is an instructor. This will query AssignmentMembership, which includes roles
CourseService.course_ids_with_role_query(
instructor_h_userid, RoleScope.COURSE, RoleType.INSTRUCTOR
)
)
)
)
candidate_courses = CourseService.courses_permission_check_query(
instructor_h_userid, admin_organization_ids, course_ids
).cte("candidate_courses")

if admin_organization_ids:
admin_organization_ids_clause = User.application_instance_id.in_(
select(ApplicationInstance.id).where(
ApplicationInstance.organization_id.in_(admin_organization_ids)
user_ids_query = (
select(AssignmentMembership.user_id)
.join(Assignment)
.join(candidate_courses, candidate_courses.c[0] == Assignment.course_id)
.where(
AssignmentMembership.lti_role_id.in_(
select(LTIRole.id).where(
LTIRole.scope == role_scope, LTIRole.type == role_type
)
)
)

# instructor_h_userid and admin_organization_ids are about access rather than filtering.
# we apply them both as an or to fetch users where the users is either an instructor or an admin
query = query.where(
or_(instructor_h_userid_clause, admin_organization_ids_clause)
)

if h_userids:
query = query.where(User.h_userid.in_(h_userids))

if course_ids:
assignment_membership_subquery = assignment_membership_subquery.join(
Assignment
).where(Assignment.course_id.in_(course_ids))

if assignment_ids:
assignment_membership_subquery = assignment_membership_subquery.where(
user_ids_query = user_ids_query.where(
AssignmentMembership.assignment_id.in_(assignment_ids)
)

query = query.where(User.id.in_(assignment_membership_subquery))

# Deduplicate based on the row's h_userid taking the last updated one
query = query.distinct(User.h_userid).order_by(
User.h_userid, User.updated.desc()
query = (
select(User.id)
.distinct(User.h_userid)
# Deduplicate based on the row's h_userid taking the last updated one
.where(User.id.in_(user_ids_query))
.order_by(User.h_userid, User.updated.desc())
)

if h_userids:
query = query.where(User.h_userid.in_(h_userids))

return (
select(User)
.where(
User.id.in_(query)
# We can sort these again without affecting deduplication
)
.where(User.id.in_(query))
# We can sort these again without affecting deduplication
.order_by(User.display_name, User.id)
)

Expand Down
Loading

0 comments on commit a536df8

Please sign in to comment.