Skip to content

Commit

Permalink
Merge pull request #2100 from laws-africa/extractor
Browse files Browse the repository at this point in the history
Prototype workflow to use AI to extract judgment details
  • Loading branch information
longhotsummer authored Oct 14, 2024
2 parents 0868536 + 6f24187 commit 661d4b1
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 12 deletions.
93 changes: 84 additions & 9 deletions peachjam/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
from dal import autocomplete
from django import forms
from django.conf import settings
from django.contrib import admin
from django.contrib import admin, messages
from django.contrib.auth import get_user_model
from django.contrib.auth.admin import UserAdmin
from django.contrib.contenttypes.admin import GenericStackedInline, GenericTabularInline
from django.core.exceptions import ValidationError
from django.http.response import FileResponse
from django.shortcuts import get_object_or_404
from django.shortcuts import get_object_or_404, redirect, render
from django.template.response import TemplateResponse
from django.urls import path, reverse
from django.utils import timezone
Expand All @@ -29,8 +29,10 @@
from treebeard.admin import TreeAdmin
from treebeard.forms import MoveNodeForm, movenodeform_factory

from peachjam.extractor import ExtractorError, ExtractorService
from peachjam.forms import (
AttachedFilesForm,
JudgmentUploadForm,
NewDocumentFormMixin,
PublicationFileForm,
SourceFileForm,
Expand Down Expand Up @@ -288,7 +290,8 @@ def value_from_datadict(self, data, files, name):

class DocumentForm(forms.ModelForm):
# to track edit activity
edit_activity_start = forms.DateTimeField(widget=forms.HiddenInput(), required=True)
edit_activity_start = forms.DateTimeField(widget=forms.HiddenInput())
edit_activity_stage = forms.CharField(widget=forms.HiddenInput())
content_html = forms.CharField(
widget=CKEditorWidget(
extra_plugins=["lawwidgets"],
Expand Down Expand Up @@ -342,6 +345,9 @@ def __init__(self, data=None, *args, **kwargs):
self.fields["content_html"].widget.attrs["readonly"] = True

self.fields["edit_activity_start"].initial = timezone.now()
self.fields["edit_activity_stage"].initial = (
"corrections" if self.instance.pk else "initial"
)

def clean_content_html(self):
# prevent CKEditor-based editing of AKN HTML
Expand Down Expand Up @@ -434,9 +440,9 @@ class DocumentAdmin(BaseAdmin):
{
"fields": [
"work_link",
"title",
"jurisdiction",
"locality",
"title",
"date",
"language",
]
Expand Down Expand Up @@ -526,6 +532,14 @@ class NewForm(self.new_document_form_mixin, form):

return super().get_form(request, obj, **kwargs)

def render_change_form(self, request, context, *args, **kwargs):
# this is our only chance to inject a pre-filled field from the querystring for both add and change
if request.GET.get("stage"):
context["adminform"].form.fields[
"edit_activity_stage"
].initial = request.GET["stage"]
return super().render_change_form(request, context, *args, **kwargs)

def save_model(self, request, obj, form, change):
if not change:
obj.created_by = request.user
Expand All @@ -536,7 +550,7 @@ def save_model(self, request, obj, form, change):
EditActivity.objects.create(
document=obj,
user=request.user,
stage="corrections" if change else "initial",
stage=form.cleaned_data["edit_activity_stage"],
start=form.cleaned_data["edit_activity_start"],
end=timezone.now(),
)
Expand Down Expand Up @@ -883,14 +897,15 @@ class JudgmentAdmin(ImportExportMixin, DocumentAdmin):
fieldsets[0][1]["fields"].insert(3, "court")
fieldsets[0][1]["fields"].insert(4, "registry")
fieldsets[0][1]["fields"].insert(5, "case_name")
fieldsets[0][1]["fields"].insert(6, "outcomes")
fieldsets[0][1]["fields"].insert(7, "mnc")
fieldsets[0][1]["fields"].insert(8, "serial_number_override")
fieldsets[0][1]["fields"].insert(9, "serial_number")
fieldsets[0][1]["fields"].append("mnc")
fieldsets[0][1]["fields"].append("hearing_date")
fieldsets[0][1]["fields"].append("outcomes")

fieldsets[1][1]["fields"].insert(0, "attorneys")

fieldsets[2][1]["classes"] = ["collapse"]
fieldsets[2][1]["fields"].append("serial_number")
fieldsets[2][1]["fields"].append("serial_number_override")
fieldsets[3][1]["fields"].extend(["case_summary", "flynote", "order"])
readonly_fields = [
"mnc",
Expand Down Expand Up @@ -920,6 +935,12 @@ class JudgmentAdmin(ImportExportMixin, DocumentAdmin):
class Media:
js = ("js/judgment_duplicates.js",)

def changelist_view(self, request, extra_context=None):
extra_context = extra_context or {}
extra_context["can_upload_document"] = ExtractorService().enabled()
extra_context["upload_url"] = reverse("admin:peachjam_judgment_upload")
return super().changelist_view(request, extra_context)

def get_fieldsets(self, request, obj=None):
fieldsets = super().get_fieldsets(request, obj)

Expand All @@ -938,6 +959,60 @@ def get_fieldsets(self, request, obj=None):

return fieldsets

def get_urls(self):
urls = super().get_urls()
custom_urls = [
path(
"upload/",
self.admin_site.admin_view(self.upload_view),
name="peachjam_judgment_upload",
),
]
return custom_urls + urls

def upload_view(self, request):
extractor = ExtractorService()
if not extractor.enabled():
messages.error(
request,
_(
"The Laws.Africa extractor is not enabled. Please check your settings."
),
)
return redirect("admin:peachjam_judgment_changelist")

form = JudgmentUploadForm(
initial={"jurisdiction": pj_settings().default_document_jurisdiction}
)

# Custom logic for the upload view
if request.method == "POST":
form = JudgmentUploadForm(
request.POST,
request.FILES,
)
if form.is_valid():
try:
doc = extractor.extract_judgment_from_file(
jurisdiction=form.cleaned_data["jurisdiction"],
file=form.cleaned_data["file"],
)
messages.success(
request, _("Judgment uploaded. Please check details carefully.")
)
url = (
reverse("admin:peachjam_judgment_change", args=[doc.pk])
+ "?stage=after-extraction"
)
return redirect(url)
except ExtractorError as e:
form.add_error(None, str(e))

context = {
"form": form,
}
return render(request, "admin/judgment_upload_form.html", context)


@admin.register(Predicate)
class PredicateAdmin(admin.ModelAdmin):
Expand Down
2 changes: 1 addition & 1 deletion peachjam/analysis/citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class CitatorMatcher:
"""Matcher that delegates to the Citator service."""

citator_url = settings.PEACHJAM["CITATOR_API"]
citator_key = settings.PEACHJAM["CITATOR_API_KEY"]
citator_key = settings.PEACHJAM["LAWSAFRICA_API_KEY"]
max_text_size = 1024 * 1024 * 2

def __init__(self):
Expand Down
124 changes: 124 additions & 0 deletions peachjam/extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import logging
from datetime import datetime

import requests
from django.conf import settings
from django.core.files import File
from languages_plus.models import Language

from peachjam.models import CaseNumber, Court, Judgment, SourceFile, pj_settings
from peachjam.storage import clean_filename

log = logging.getLogger(__name__)


class ExtractorError(Exception):
pass


class ExtractorService:
def __init__(self):
self.api_token = settings.PEACHJAM["LAWSAFRICA_API_KEY"]
self.api_url = settings.PEACHJAM["EXTRACTOR_API"]

def enabled(self):
return self.api_token and self.api_url

def extract_judgment_details(self, jurisdiction, file):
if not self.enabled():
raise ExtractorError("Extractor service not configured")

data = {
"country": jurisdiction.pk,
"court_names": [c.name for c in Court.objects.all()],
}
headers = self.get_headers()
resp = requests.post(
self.api_url + "judgment",
files={"file": file},
data=data,
headers=headers,
)
if resp.status_code != 200:
raise ExtractorError(
f"Error calling extractor service: {resp.status_code} {resp.text}"
)
data = resp.json()
log.info(f"Extracted details: {data}")
return data["extracted"]

def get_headers(self):
return {"Authorization": "Token " + self.api_token}

def extract_judgment_from_file(self, jurisdiction, file):
details = self.extract_judgment_details(jurisdiction, file)

if details.get("language"):
language = (
Language.objects.filter(iso_639_3=details["language"].lower()).first()
or pj_settings().default_document_language
or Language.objects.get(pk="en")
)
else:
raise ExtractorError("No language detected")

if details.get("court"):
try:
court = Court.objects.filter(name=details["court"]).first()
except Court.DoesNotExist:
raise ExtractorError(f"Could not find court: {details['court']}")
else:
raise ExtractorError("No language detected")

if details.get("date"):
try:
date = datetime.strptime(details["date"], "%Y-%m-%d")
except ValueError:
raise ExtractorError(f"Invalid date: {details['date']}")
else:
raise ExtractorError("No date detected")

log.info("Creating new judgment")
doc = Judgment()
doc.jurisdiction = jurisdiction
doc.language = language
doc.court = court
doc.date = date
doc.case_name = details.get("case_name", "")

if details.get("hearing_date"):
try:
doc.hearing_date = datetime.strptime(
details["hearing_date"], "%Y-%m-%d"
)
except ValueError:
pass

doc.save()

# attach source file
file.seek(0)
SourceFile(
document=doc,
file=File(file, name=clean_filename(file.name)),
filename=file.name,
mimetype=file.content_type,
).save()

if doc.extract_content_from_source_file():
doc.save()

if doc.extract_citations():
doc.save()

# case numbers
for case_number in details.get("case_numbers", []):
# TODO: matter type
CaseNumber(
document=doc,
number=case_number["number"],
year=case_number["year"],
string_override=case_number["case_number_string"],
)

return doc
13 changes: 13 additions & 0 deletions peachjam/forms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import copy

from allauth.account.forms import LoginForm, SignupForm
from countries_plus.models import Country
from django import forms
from django.conf import settings
from django.contrib.auth import get_user_model
Expand All @@ -18,6 +19,7 @@
AttachedFiles,
CoreDocument,
Folder,
PeachJamSettings,
PublicationFile,
SavedDocument,
SourceFile,
Expand Down Expand Up @@ -380,3 +382,14 @@ class PeachjamSignupForm(SignupForm):

class PeachjamLoginForm(LoginForm):
captcha = ReCaptchaField(widget=ReCaptchaV2Invisible)


class JudgmentUploadForm(forms.Form):
jurisdiction = forms.ModelChoiceField(Country.objects)
file = forms.FileField()

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fields[
"jurisdiction"
].queryset = PeachJamSettings.load().document_jurisdictions.all()
7 changes: 6 additions & 1 deletion peachjam/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,15 @@
"SUPPORT_EMAIL": os.environ.get("SUPPORT_EMAIL"),
"SENTRY_DSN_KEY": os.environ.get("SENTRY_DSN_KEY"),
"SENTRY_ENVIRONMENT": os.environ.get("SENTRY_ENVIRONMENT", "staging"),
"LAWSAFRICA_API_KEY": os.environ.get(
"LAWSAFRICA_API_KEY", os.environ.get("CITATOR_API_KEY")
),
"CITATOR_API": os.environ.get(
"CITATOR_API", "https://services.lawsafrica.com/citator/v1/extract-citations"
),
"CITATOR_API_KEY": os.environ.get("CITATOR_API_KEY"),
"EXTRACTOR_API": os.environ.get(
"EXTRACTOR_API", "https://services.lawsafrica.com/extractor/v1/extract/"
),
"EXTRA_SEARCH_INDEXES": [],
"SEARCH_JURISDICTION_FILTER": False,
"MULTIPLE_JURISDICTIONS": False,
Expand Down
3 changes: 2 additions & 1 deletion peachjam/templates/admin/change_form.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
{% load i18n comments humanize static %}
{% block form_top %}
{{ block.super }}
{% if adminform.form.edit_activity_start %}{{ adminform.form.edit_activity_start.as_hidden }}{% endif %}
{% if adminform.form.edit_activity_start %}{{ adminform.form.edit_activity_start }}{% endif %}
{% if adminform.form.edit_activity_stage %}{{ adminform.form.edit_activity_stage }}{% endif %}
{% endblock %}
{% block extra_actions %}
{{ block.super }}
Expand Down
6 changes: 6 additions & 0 deletions peachjam/templates/admin/change_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
{% load i18n %}
{% block object-tools-items %}
{{ block.super }}
{% if can_upload_document %}
<a class="btn btn-success" href="{{ upload_url }}">
<i class="fa fa-upload"></i>
{% trans 'Upload' %}
</a>
{% endif %}
{% if PEACHJAM_SETTINGS.editor_help_link and help_topic %}
<a class="btn btn-info"
target="_blank"
Expand Down
Loading

0 comments on commit 661d4b1

Please sign in to comment.