Skip to content

Commit

Permalink
Merge pull request #157 from openstates/trim-note-strings-at-import
Browse files Browse the repository at this point in the history
Avoid DB import bugs by trimming strings at import transform
  • Loading branch information
jessemortenson authored Nov 22, 2024
2 parents 41e214d + 80d9da3 commit 1bf7802
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 12 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 6.20.11 - Nov 22, 2024

* Use transformers to trim incoming strings at import that are too long for DB columns:
* Bill: document note, version note
* Event: media note

## 6.20.10 - Nov 7, 2024

* Add additional log info re: archiving scrape files to cloud storage
Expand Down
24 changes: 14 additions & 10 deletions openstates/importers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,16 +532,20 @@ def apply_transformers(
if transformers is None:
transformers = self.cached_transformers

for key, key_transformers in transformers.items():
if key not in data:
continue
if isinstance(key_transformers, list):
for transformer in key_transformers:
data[key] = transformer(data[key])
elif isinstance(key_transformers, dict):
self.apply_transformers(data[key], key_transformers)
else:
data[key] = key_transformers(data[key])
if isinstance(data, list):
for data_item in data:
self.apply_transformers(data_item, transformers)
else:
for key, key_transformers in transformers.items():
if key not in data:
continue
if isinstance(key_transformers, list):
for transformer in key_transformers:
data[key] = transformer(data[key])
elif isinstance(key_transformers, dict):
self.apply_transformers(data[key], key_transformers)
else:
data[key] = key_transformers(data[key])

return data

Expand Down
11 changes: 10 additions & 1 deletion openstates/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@
CACHE_DIR = os.path.join(os.getcwd(), "_cache")
SCRAPED_DATA_DIR = os.path.join(os.getcwd(), "_data")

IMPORT_TRANSFORMERS = {"bill": {"identifier": transformers.fix_bill_id}}
IMPORT_TRANSFORMERS = {
"bill": {
"identifier": transformers.fix_bill_id,
"documents": {"note": transformers.truncate_300}, # TODO remove when db migration done
"versions": {"note": transformers.truncate_300}, # TODO remove when db migration done
},
"event": {
"media": {"note": transformers.truncate_300}, # TODO remove when db migration done
}
}

# Django settings
LOGGING = {
Expand Down
4 changes: 4 additions & 0 deletions openstates/utils/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ def fix_bill_id(bill_id: str) -> str:

def collapse_whitespace(value: str) -> str:
return _whitespace_re.sub(" ", value)


def truncate_300(value: str) -> str:
return value[:300]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "openstates"
version = "6.20.10"
version = "6.20.11"
description = "core infrastructure for the openstates project"
authors = ["James Turk <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 1bf7802

Please sign in to comment.