Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: republish failed events from csv #390

Merged
merged 7 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Change Log
Unreleased
~~~~~~~~~~
* Add script to get github action errors
* Add script to republish failed events

[2.0.0] - 2023-06-01
~~~~~~~~~~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ upgrade: ## update the requirements/*.txt files with the latest packages satisfy
pip install -qr requirements/pip-tools.txt
$(CMD_PIP_COMPILE) -o requirements/base.txt requirements/base.in
$(CMD_PIP_COMPILE) -o requirements/test.txt requirements/test.in
$(CMD_PIP_COMPILE) -o requirements/scripts.txt requirements/scripts.in
$(CMD_PIP_COMPILE) -o requirements/doc.txt requirements/doc.in
$(CMD_PIP_COMPILE) -o requirements/quality.txt requirements/quality.in
$(CMD_PIP_COMPILE) -o requirements/ci.txt requirements/ci.in
Expand Down
77 changes: 77 additions & 0 deletions edx_arch_experiments/scripts/republish_failed_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Publish events from a csv
rgraber marked this conversation as resolved.
Show resolved Hide resolved

This is meant to help republish failed events. The CSV may be an export from Splunk, or it may be manually created, as
long as it has 'initial_topic', 'event_type', 'event_data_as_json', 'event_key_field', and 'event_metadata_as_json'
columns.

Example row:
initial_topic,event_type,event_data_as_json,event_key_field,event_metadata_as_json
test-topic,org.openedx.test.event,{"test_data": {"course_key": "ABCx"}},test_data.course_key,
{"event_type": "org.openedx.test.event","id": "12345", "minorversion": 0, "source": "openedx/cms/web",
"sourcehost": "ip-10-3-16-4", "time"": ""2023-08-10T17:55:22.088808+00:00", ""sourcelib"": [8, 5, 0]}


This is created as a script instead of a management command because it's meant to be used as a one-off and not to
require pip installing this package into anything else to run. However, since edx-event-bus-kafka does expect certain
settings, the script must be run in an environment with DJANGO_SETTINGS_MODULE.

To run:
tox -e scripts -- python edx_arch_experiments/scripts/republish_failed_events.py
--filename /Users/rgraber/oneoffs/failed_events.csv
"""

import csv
import json
import sys

import click
from edx_event_bus_kafka.internal.producer import create_producer
from openedx_events.tooling import EventsMetadata, OpenEdxPublicSignal, load_all_signals


@click.command()
@click.option('--filename', type=click.Path(exists=True))
def read_and_send_events(filename):
load_all_signals()
producer = create_producer()
try:
log_columns = ['initial_topic', 'event_type', 'event_data_as_json', 'event_key_field', 'event_metadata_as_json']
with open(filename) as log_file:
reader = csv.DictReader(log_file)
# Make sure csv contains all necessary columns for republishing
if not all(column in reader.fieldnames for column in log_columns):
print(f'Missing required columns {set(log_columns).difference(set(reader.fieldnames))}. Cannot'
f' republish events.')
timmc-edx marked this conversation as resolved.
Show resolved Hide resolved
timmc-edx marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(1)
ids = set()
for row in reader:
# An empty field may end up in Splunk as the string "None". That is not a valid value for any of the
# fields we care about, so just treat it the same as empty
empties = [key for key, value in row.items() if key in log_columns and value in [None, '', 'None']]
# If any row is missing data, stop processing the whole file to avoid sending events out of order
if len(empties) > 0:
print(f'Missing required fields in row {reader.line_num}: {empties}. Will not continue publishing.')
sys.exit(1)

# Strip single quotation marks off everything (Splunk adds them on all fields)
topic = row['initial_topic'].replace("'", "")
event_type = row['event_type'].replace("'", "")
event_data = json.loads(row['event_data_as_json'].replace("'", ""))
rgraber marked this conversation as resolved.
Show resolved Hide resolved
event_key_field = row['event_key_field'].replace("'", "")
events_metadata_json = row['event_metadata_as_json'].replace("'", "")
metadata = EventsMetadata.from_json(events_metadata_json)
signal = OpenEdxPublicSignal.get_signal_by_type(event_type)
if metadata.id in ids:
print(f"Skipping duplicate id {metadata.id}")
continue
ids.add(metadata.id)

producer.send(signal=signal, event_data=event_data, event_key_field=event_key_field, topic=topic,
event_metadata=metadata)
timmc-edx marked this conversation as resolved.
Show resolved Hide resolved
finally:
producer.prepare_for_shutdown()


if __name__ == '__main__':
read_and_send_events()
8 changes: 8 additions & 0 deletions edx_arch_experiments/settings/scripts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
Settings for running scripts in /scripts
"""
import os
from os.path import abspath, dirname, join

if os.path.isfile(join(dirname(abspath(__file__)), 'private.py')):
from .private import * # pylint: disable=import-error,wildcard-import
58 changes: 50 additions & 8 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,79 @@
#
asgiref==3.7.2
# via django
attrs==23.1.0
# via openedx-events
cffi==1.15.1
# via pynacl
click==8.1.5
# via edx-django-utils
click==8.1.6
# via
# code-annotations
# edx-django-utils
code-annotations==1.5.0
# via edx-toggles
django==3.2.20
# via
# -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
# -r requirements/base.in
# django-crum
# django-waffle
# edx-django-utils
# edx-event-bus-kafka
# edx-toggles
# openedx-events
django-crum==0.7.9
# via edx-django-utils
django-waffle==3.0.0
# via edx-django-utils
edx-django-utils==5.5.0
# via
# edx-django-utils
# edx-toggles
django-waffle==4.0.0
# via
# edx-django-utils
# edx-toggles
edx-django-utils==5.7.0
# via
# -r requirements/base.in
# edx-event-bus-kafka
# edx-toggles
edx-event-bus-kafka==5.3.1
# via -r requirements/base.in
newrelic==8.8.1
edx-opaque-keys[django]==2.4.0
# via openedx-events
edx-toggles==5.1.0
# via edx-event-bus-kafka
fastavro==1.8.2
# via openedx-events
jinja2==3.1.2
# via code-annotations
markupsafe==2.1.3
# via jinja2
newrelic==8.9.0
# via edx-django-utils
openedx-events==8.5.0
# via edx-event-bus-kafka
pbr==5.11.1
# via stevedore
psutil==5.9.5
# via edx-django-utils
pycparser==2.21
# via cffi
pymongo==3.13.0
# via edx-opaque-keys
pynacl==1.5.0
# via edx-django-utils
python-slugify==8.0.1
# via code-annotations
pytz==2023.3
# via django
pyyaml==6.0.1
# via code-annotations
sqlparse==0.4.4
# via django
stevedore==5.1.0
# via edx-django-utils
# via
# code-annotations
# edx-django-utils
# edx-opaque-keys
text-unidecode==1.3
# via python-slugify
typing-extensions==4.7.1
# via asgiref
6 changes: 3 additions & 3 deletions requirements/ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
#
# make upgrade
#
distlib==0.3.6
distlib==0.3.7
# via virtualenv
filelock==3.12.2
# via
# tox
# virtualenv
packaging==23.1
# via tox
platformdirs==3.9.1
platformdirs==3.10.0
# via virtualenv
pluggy==1.2.0
# via tox
Expand All @@ -26,5 +26,5 @@ tox==3.28.0
# via
# -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
# -r requirements/ci.in
virtualenv==20.24.0
virtualenv==20.24.2
# via tox
Loading
Loading