Skip to content

Commit

Permalink
Merge branch 'main' into docker-3134
Browse files Browse the repository at this point in the history
  • Loading branch information
mlissner authored Nov 29, 2023
2 parents 069ceda + 24191e5 commit 265fcbe
Show file tree
Hide file tree
Showing 56 changed files with 5,010 additions and 2,163 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/semgrep.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ jobs:
semgrep:
runs-on: ubuntu-latest
name: Check
container:
image: returntocorp/semgrep
steps:
- uses: actions/checkout@v3
- name: Semgrep
id: semgrep
uses: returntocorp/semgrep-action@v1
with:
publishToken: ${{ secrets.SEMGREP_APP_TOKEN }}
- run: semgrep ci
env:
SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }}
2 changes: 0 additions & 2 deletions cl/alerts/api_serializers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from drf_dynamic_fields import DynamicFieldsMixin
from rest_framework import serializers
from rest_framework.serializers import HyperlinkedRelatedField

from cl.alerts.models import Alert, DocketAlert
from cl.api.utils import HyperlinkedModelSerializerWithId
from cl.search.models import Docket


class SearchAlertSerializer(
Expand Down
4 changes: 2 additions & 2 deletions cl/alerts/management/commands/cl_index_search_alerts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from cl.alerts.models import Alert
from cl.alerts.tasks import index_alert_document
from cl.alerts.tasks import es_save_alert_document
from cl.lib.command_utils import VerboseCommand, logger
from cl.search.documents import AudioPercolator
from cl.search.models import SEARCH_TYPES
Expand Down Expand Up @@ -48,7 +48,7 @@ def handle(self, *args, **options):
# Indexing the Alert objects
for alert in queryset.iterator():
logger.info(f"Indexing Alert with ID: {alert.pk}")
index_alert_document.delay(alert, es_document)
es_save_alert_document.delay(alert.pk, es_document.__name__)
indexing_counter += 1

self.stdout.write(
Expand Down
4 changes: 2 additions & 2 deletions cl/alerts/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.dispatch import receiver

from cl.alerts.models import Alert
from cl.alerts.tasks import index_alert_document
from cl.alerts.tasks import es_save_alert_document
from cl.lib.command_utils import logger
from cl.search.documents import AudioPercolator
from cl.search.models import SEARCH_TYPES
Expand All @@ -22,7 +22,7 @@ def create_or_update_alert_in_es_index(sender, instance=None, **kwargs):
return

if f"type={SEARCH_TYPES.ORAL_ARGUMENT}" in instance.query:
index_alert_document.delay(instance, AudioPercolator)
es_save_alert_document.delay(instance.pk, AudioPercolator.__name__)


@receiver(
Expand Down
72 changes: 19 additions & 53 deletions cl/alerts/tasks.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import copy
from dataclasses import dataclass
from datetime import datetime
from importlib import import_module
from typing import Dict, List, Tuple, Union, cast

from celery import Task
from django.conf import settings
from django.contrib.auth.models import User
from django.core.mail import EmailMultiAlternatives, get_connection, send_mail
from django.db import transaction
from django.db.models import Prefetch
from django.template import loader
from django.utils.timezone import now
from elasticsearch.exceptions import (
ConnectionError,
NotFoundError,
RequestError,
TransportError,
Expand All @@ -36,19 +37,12 @@
from cl.lib.elasticsearch_utils import merge_highlights_into_result
from cl.lib.redis_utils import create_redis_semaphore, delete_redis_semaphore
from cl.lib.string_utils import trunc
from cl.people_db.models import Person
from cl.recap.constants import COURT_TIMEZONES
from cl.search.constants import ALERTS_HL_TAG
from cl.search.documents import (
ES_CHILD_ID,
AudioPercolator,
DocketDocument,
ESRECAPDocument,
PersonDocument,
PositionDocument,
)
from cl.search.documents import ES_CHILD_ID, ESRECAPDocument, PositionDocument
from cl.search.models import Docket, DocketEntry
from cl.search.types import (
AudioPercolator,
ESDocumentClassType,
PercolatorResponseType,
SaveDocumentResponseType,
Expand All @@ -57,6 +51,8 @@
from cl.stats.utils import tally_stat
from cl.users.models import UserProfile

es_document_module = import_module("cl.search.documents")


def make_alert_key(d_pk: int) -> str:
return f"docket.alert.enqueued:{d_pk}"
Expand Down Expand Up @@ -609,7 +605,7 @@ def process_percolator_response(response: PercolatorResponseType) -> None:

@app.task(
bind=True,
autoretry_for=(TransportError, ConnectionError, RequestError),
autoretry_for=(ConnectionError,),
max_retries=3,
interval_start=5,
)
Expand Down Expand Up @@ -680,26 +676,33 @@ def send_or_schedule_alerts(
return alerts_triggered, document_content


# New task
@app.task(
bind=True,
autoretry_for=(TransportError, ConnectionError, RequestError),
autoretry_for=(ConnectionError,),
max_retries=3,
interval_start=5,
ignore_result=True,
queue=settings.CELERY_ETL_TASK_QUEUE,
)
def index_alert_document(
self: Task, alert: Alert, es_document=AudioPercolator
def es_save_alert_document(
self: Task,
alert_id: int,
es_document_name: str,
) -> None:
"""Helper method to prepare and index an Alert object into Elasticsearch.
:param self: The celery task
:param alert: The Alert instance to be indexed.
:param es_document: The Elasticsearch document percolator used for indexing
:param alert_id: The Alert instance ID to be indexed.
:param es_document_name: The Elasticsearch document percolator name used
for indexing.
the Alert instance.
:return: Bool, True if document was properly indexed, otherwise None.
"""

es_document = getattr(es_document_module, es_document_name)
document = es_document()
alert = Alert.objects.get(pk=alert_id)
doc = document.prepare(alert)
if not doc["percolator_query"]:
return None
Expand All @@ -708,40 +711,3 @@ def index_alert_document(
)
if not doc_indexed in ["created", "updated"]:
logger.warning(f"Error indexing Alert ID: {alert.pk}")


@app.task(
bind=True,
autoretry_for=(TransportError, ConnectionError, RequestError),
max_retries=3,
interval_start=5,
ignore_result=True,
)
def remove_doc_from_es_index(
self: Task, es_document: ESDocumentClassType, instance_id: int
) -> None:
"""Remove a document from an Elasticsearch index.
:param self: The celery task
:param es_document: The Elasticsearch document type.
:param instance_id: The ID of the instance to be removed from the
Elasticsearch index.
:return: None
"""

if es_document is PositionDocument:
doc_id = ES_CHILD_ID(instance_id).POSITION
elif es_document is ESRECAPDocument:
doc_id = ES_CHILD_ID(instance_id).RECAP
else:
doc_id = instance_id

try:
doc = es_document.get(id=doc_id)
doc.delete(refresh=settings.ELASTICSEARCH_DSL_AUTO_REFRESH)
except NotFoundError:
model_label = es_document.Django.model.__name__.capitalize()
logger.error(
f"The {model_label} with ID:{instance_id} can't be deleted from "
f"the ES index, it doesn't exists."
)
Loading

0 comments on commit 265fcbe

Please sign in to comment.