Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: nationalarchives/ds-caselaw-marklogic
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 40afe46ce8eebb58caedcee4ed1c068ad901ab17
Choose a base ref
..
head repository: nationalarchives/ds-caselaw-marklogic
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 6f7185a4af32a704e1142dfb61a6d13ae1d2c7ec
Choose a head ref
Showing with 38 additions and 55 deletions.
  1. +1 −0 .gitignore
  2. +21 −26 .pre-commit-config.yaml
  3. +9 −15 development_scripts/populate_top_judgments_and_neighbours.py
  4. +7 −14 pyproject.toml
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -4,3 +4,4 @@ build
gradle-*.properties
!gradle-development.properties
__pycache__
node_modules/
47 changes: 21 additions & 26 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
exclude: "^docs/|/migrations/"
default_install_hook_types: [pre-commit, pre-push]
exclude: src/main/.*\.json # These are templates, not real JSON. They won't parse cleanly.

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- id: check-case-conflict
- id: check-json
- id: check-merge-conflict
- id: check-xml
- id: check-yaml
- id: end-of-file-fixer
- id: forbid-submodules
- id: mixed-line-ending
- id: no-commit-to-branch
- id: trailing-whitespace

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format

- repo: local
hooks:
@@ -20,26 +33,8 @@ repos:
entry: python scripts/validate_schemas
files: .xsd$

- repo: https://github.com/psf/black
rev: 23.11.0
hooks:
- id: black

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.1.0
rev: v4.0.0-alpha.8
hooks:
- id: prettier
types_or: [scss, yaml, markdown, javascript, xml]

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]

# sets up .pre-commit-ci.yaml to ensure pre-commit dependencies stay up to date
ci:
autoupdate_schedule: weekly
skip: []
submodules: false
types_or: [yaml, json, xml, markdown, scss, javascript]
24 changes: 9 additions & 15 deletions development_scripts/populate_top_judgments_and_neighbours.py
Original file line number Diff line number Diff line change
@@ -33,14 +33,14 @@


def get_judgment_xml(url):
print("Getting judgment: %s" % url)
print(f"Getting judgment: {url}")
response = requests.get(f"https://caselaw.nationalarchives.gov.uk/{url}/data.xml")
response.raise_for_status()
return response.content


def save_judgment_xml(url, xml):
print("Saving judgment: %s" % url)
print(f"Saving judgment: {url}")
ml_url = f"/{url}.xml"
response = requests.put(
f"http://admin:admin@localhost:8011/LATEST/documents?uri={ml_url}&collection=judgment",
@@ -49,7 +49,7 @@ def save_judgment_xml(url, xml):
try:
response.raise_for_status()
except requests.exceptions.RequestException as e:
print("Something went wrong saving the judgment: %s" % e)
print(f"Something went wrong saving the judgment: {e}")


def get_neighbours_for_judgment(xml):
@@ -59,18 +59,12 @@ def get_neighbours_for_judgment(xml):
"./akn:judgment/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname",
ns,
).attrib["value"]
print("Getting neighbours for judgment title: %s" % title)
search_url = (
"https://caselaw.nationalarchives.gov.uk/judgments/results?query="
+ quote(title)
)
print(f"Getting neighbours for judgment title: {title}")
search_url = "https://caselaw.nationalarchives.gov.uk/judgments/results?query=" + quote(title)
search_results = requests.get(search_url)
search_soup = BeautifulSoup(search_results.content, "html.parser")
neighbours = list(
re.sub(r"^\/", "", a["href"])
for a in search_soup.select(".judgment-listing__title a")
)
print("... found %s" % len(neighbours))
neighbours = list(re.sub(r"^\/", "", a["href"]) for a in search_soup.select(".judgment-listing__title a"))
print(f"... found {len(neighbours)}")
return neighbours


@@ -85,6 +79,6 @@ def get_neighbours_for_judgment(xml):
save_judgment_xml(url2, xml2)
found.add(url2)
else:
print("Skipping already imported judgment %s" % url2)
print(f"Skipping already imported judgment {url2}")
print(f"**** {url} and close title matches added to local Marklogic db ****")
print("DONE. Imported %s judgments." % len(found))
print(f"DONE. Imported {len(found)} judgments.")
21 changes: 7 additions & 14 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -15,17 +15,19 @@ pytest = "^7.4.3"
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


[tool.ruff]
line-length = 120

[tool.ruff.lint]
ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # long lines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
extend-select = ["W", "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
"ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
"FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
unfixable = ["ERA"]

# things skipped:
# N: naming, possibly good
# D: docstrings missing throughout
# N: naming, possibly good
# D: docstrings missing throughout
# ANN: annotations missing throughout
# FBT: not convinced boolean trap worth auto-banning.
# CPY: copyright at top of each file
@@ -37,19 +39,10 @@ unfixable = ["ERA"]
# PD, NPY, AIR: ignored, panda / numpy / airflow specific
# FURB: not yet out of preview



[tool.ruff.extend-per-file-ignores]
[tool.ruff.lint.extend-per-file-ignores]
"*" = ["RET505", # disagree with if X: return Y else: return Z being wrong
"T201", # print
"S113", # requests no timeout : TODO
]
"tests/*" = ["S101"] # assert fine in tests
"development_scripts/populate_top_judgments_and_neighbours.py" = ["S314", "C400"] # TODO



[tool.ruff.isort]
known-first-party = ["ds-caselaw-editor-ui", "config"]

[tool.ruff.pycodestyle]
"development_scripts/populate_top_judgments_and_neighbours.py" = ["S314", "C400"] # TODO