Skip to content

Commit

Permalink
feat: Add a "tutor do" command to transform tracking logs
Browse files Browse the repository at this point in the history
While this command can be run as a management command on the server, it needs to run as a job for Vector to pick up the logging statements and insert them into the xAPI table.

Also fixes some bugs with Vector reading xAPI logging statements.
  • Loading branch information
bmtcril committed Jul 27, 2023
1 parent 1eaa9a5 commit 5e0f970
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 7 deletions.
28 changes: 25 additions & 3 deletions tutoraspects/commands_v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tutor import config as tutor_config


@click.command(help="Create an Open edX user and interactively set their password")
@click.command(help="Run dbt with the provided command and options.")
@click.option(
"-c",
"--command",
Expand Down Expand Up @@ -36,7 +36,7 @@ def dbt(context, command) -> None:
runner.run_job("aspects", command)


@click.command()
@click.command(help="Load generated fake xAPI test data to ClickHouse.")
@click.option("-n", "--num_batches", default=100)
@click.option("-s", "--batch_size", default=100)
@click.pass_obj
Expand All @@ -55,7 +55,10 @@ def load_xapi_test_data(context, num_batches, batch_size) -> None:
runner.run_job("aspects", command)


@click.command(context_settings={"ignore_unknown_options": True})
@click.command(
help="Run Alembic migrations with the given options.",
context_settings={"ignore_unknown_options": True},
)
@click.option(
"-c",
"--command",
Expand Down Expand Up @@ -104,9 +107,28 @@ def dump_courses_to_clickhouse(context, options) -> None:
runner.run_job("cms", command)


# pylint: disable=line-too-long
# Ex: tutor local do transform-tracking-logs --options "--source_provider MINIO --source_config '{\"key\": \"openedx\", \"secret\": \"h3SIhXAqDDxJAP6TcXklNxro\", \"container\": \"openedx\", \"prefix\": \"/tracking_logs\", \"host\": \"files.local.overhang.io\", \"secure\": false}' --destination_provider LRS --transformer_type xapi"
@click.command(help="Uses event-routing-backends to replay tracking logs.")
@click.option("--options", default="")
@click.pass_obj
def transform_tracking_logs(context, options) -> None:
"""
Job that proxies the dump_courses_to_clickhouse commands.
"""
config = tutor_config.load(context.root)
runner = context.job_runner(config)

command = f"""
./manage.py lms transform_tracking_logs {options}
"""
runner.run_job("lms", command)


COMMANDS = (
load_xapi_test_data,
dbt,
alembic,
dump_courses_to_clickhouse,
transform_tracking_logs,
)
13 changes: 13 additions & 0 deletions tutoraspects/commands_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,22 @@ def dump_courses_to_clickhouse(options) -> list[tuple[str, str]]:
return [("cms", f"./manage.py cms dump_courses_to_clickhouse {options}")]


# pylint: disable=line-too-long
# Ex: tutor local do transform-tracking-logs --options "--source_provider LOCAL --source_config '{\"key\": \"/openedx/data/\", \"prefix\": \"tracking.log\", \"container\": \"logs\"}' --destination_provider LRS --transformer_type xapi"
# Ex: tutor local do transform-tracking-logs --options "--source_provider MINIO --source_config '{\"key\": \"openedx\", \"secret\": \"h3SIhXAqDDxJAP6TcXklNxro\", \"container\": \"openedx\", \"prefix\": \"/tracking_logs\", \"host\": \"files.local.overhang.io\", \"secure\": false}' --destination_provider LRS --transformer_type xapi"
@click.command(context_settings={"ignore_unknown_options": True})
@click.option("--options", default="", type=click.UNPROCESSED)
def transform_tracking_logs(options) -> list[tuple[str, str]]:
"""
Job that proxies the dump_courses_to_clickhouse commands.
"""
return [("lms", f"./manage.py lms transform_tracking_logs {options}")]


COMMANDS = (
load_xapi_test_data,
dbt,
alembic,
dump_courses_to_clickhouse,
transform_tracking_logs,
)
5 changes: 3 additions & 2 deletions tutoraspects/patches/local-docker-compose-jobs-services
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ aspects-job:
volumes:
- ../../env/plugins/aspects/apps/aspects:/app/aspects
- ../../env/plugins/aspects/apps/aspects/scripts/:/app/aspects/scripts:ro
depends_on:
- superset {% if RUN_CLICKHOUSE%}
{% if RUN_SUPERSET or RUN_CLICKHOUSE or RUN_RALPH %}depends_on:{% if RUN_SUPERSET %}
- superset{% endif %}{% if RUN_CLICKHOUSE%}
- clickhouse{% endif %}{% if RUN_RALPH %}
- ralph{% endif %}
{% endif %}
clickhouse-job:
image: {{DOCKER_IMAGE_CLICKHOUSE}}
{% if RUN_CLICKHOUSE%}depends_on:
Expand Down
1 change: 1 addition & 0 deletions tutoraspects/templates/aspects/apps/vector/file.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
[sources.tracking_log_file]
type = "file"
include = ["/var/log/openedx/tracking.log"]

[transforms.openedx_containers]
type = "filter"
# no-op filter: created for future-proof compatibility
Expand Down
2 changes: 1 addition & 1 deletion tutoraspects/templates/aspects/apps/vector/k8s.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ extra_namespace_label_selector = "kubernetes.io/metadata.name={{ K8S_NAMESPACE }
[transforms.openedx_containers]
type = "filter"
inputs = ["kubernetes_logs"]
condition = '.kubernetes.pod_namespace == "{{ K8S_NAMESPACE }}" && includes(["lms", "cms"], .kubernetes.container_name)'
condition = '.kubernetes.pod_namespace == "{{ K8S_NAMESPACE }}" && includes(["lms", "cms", "lms-job", "cms-job"], .kubernetes.container_name)'

{% include "aspects/apps/vector/partials/common-post.toml" %}
3 changes: 2 additions & 1 deletion tutoraspects/templates/aspects/apps/vector/local.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
# Capture logs from all docker containers
[sources.docker_logs]
type = "docker_logs"

[transforms.openedx_containers]
type = "filter"
inputs = ["docker_logs"]
condition = 'includes(["lms", "cms", "lms-worker"], .label."com.docker.compose.service")'
condition = 'includes(["lms", "cms", "lms-job", "cms-job"], .label."com.docker.compose.service")'

{% include "aspects/apps/vector/partials/common-post.toml" %}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ event_id = parsed_json.id
drop_on_error = true
drop_on_abort = true

[transforms.xapi_debug]
type = "remap"
inputs = ["xapi"]
# Time formats: https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html#specifiers
source = '''
.message = parse_json!(.event_str)
'''

### Sinks

Expand All @@ -82,6 +89,12 @@ inputs = ["tracking_debug"]
encoding.codec = "json"
encoding.only_fields = ["time", "message.context.course_id", "message.context.user_id", "message.name"]

[sinks.out_xapi]
type = "console"
inputs = ["xapi_debug"]
encoding.codec = "json"
encoding.only_fields = ["event_id", "emission_time", "message.verb.id"]

# # Send logs to clickhouse
[sinks.clickhouse]
type = "clickhouse"
Expand Down

0 comments on commit 5e0f970

Please sign in to comment.