-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #40 from chnm/gazetteer
feat: Implementation of the gazetteer index
- Loading branch information
Showing
35 changed files
with
1,242 additions
and
396 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
# The following script reads the "Toponym Variants" sheet from the Excel file | ||
# and creates a new ToponymVariant object for each row. It also associates the | ||
# toponym variant with the corresponding toponym object. The field we want to write to | ||
# for these variants is "placename_from_mss" for each toponym object. | ||
|
||
# The script is similar to the load_toponyms.py script, but with some modifications | ||
# to accommodate the different fields and relationships. | ||
|
||
import logging | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from django.core.management.base import BaseCommand | ||
from django.db import IntegrityError, transaction | ||
|
||
from manuscript.models import Location, LocationAlias | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
help_text = "Load data from an Excel file. This reads information about the toponym variants and imports them." | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"--filepath", type=str, help="filepath of excel file to load" | ||
) | ||
parser.add_argument("--sheetname", type=str, help="name of sheet to load") | ||
|
||
def handle_error(self, index, e, row, column_name, column_value): | ||
logger.error( | ||
"Error loading data at row %s, column '%s' with value '%s': %s - %s", | ||
index + 1, | ||
column_name, | ||
column_value, | ||
type(e), | ||
e, | ||
) | ||
logger.debug("Row data: \n%s", row) | ||
|
||
def process_field(self, row, field_name, index, is_bool=False): | ||
try: | ||
field_value = row.get(field_name) | ||
if field_value is not None and isinstance(field_value, str): | ||
field_value = field_value.strip() | ||
return field_value | ||
except Exception as e: | ||
self.handle_error(index, e, row, field_name, row.get(field_name)) | ||
raise e | ||
|
||
def create_location_alias(self, place_id, toponym): | ||
try: | ||
# Look up the Location instance using place_id | ||
locations = Location.objects.filter(placename_id=place_id) | ||
|
||
if not locations.exists(): | ||
logger.error("Location with placename_id %s does not exist", place_id) | ||
return | ||
|
||
for location in locations: | ||
# Create or get the LocationAlias | ||
location_alias, created = LocationAlias.objects.get_or_create( | ||
location=location, | ||
placename_from_mss=toponym, | ||
) | ||
if created: | ||
logger.info("Created new LocationAlias: %s", location_alias) | ||
else: | ||
logger.info("LocationAlias already exists: %s", location_alias) | ||
except IntegrityError as e: | ||
logger.error("Integrity error creating LocationAlias: %s", e) | ||
except IntegrityError as e: | ||
logger.error("Integrity error creating LocationAlias: %s", e) | ||
except Exception as e: | ||
logger.error("Error creating LocationAlias: %s", e) | ||
|
||
def handle(self, *args, **options): | ||
filepath = options.get("filepath") | ||
sheet_name = options.get("sheetname") | ||
|
||
try: | ||
with transaction.atomic(): | ||
self.load_data(filepath, sheet_name) | ||
logger.info("Data loaded successfully") | ||
except FileNotFoundError as e: | ||
logger.error("Error loading data: %s.", e) | ||
|
||
def load_data(self, filepath: str, sheet_name: str): | ||
try: | ||
logger.info("Loading data from %s sheet %s", filepath, sheet_name) | ||
xls = pd.ExcelFile(filepath) | ||
|
||
if sheet_name: | ||
df = pd.read_excel(xls, sheet_name) | ||
df = df.replace({np.nan: None}) | ||
df.columns = ( | ||
df.columns.str.strip() | ||
.str.lower() | ||
.str.replace("[^\w\s]", "") | ||
.str.replace(" ", "_") | ||
) | ||
dfs = {sheet_name: df} | ||
else: | ||
dfs = pd.read_excel(xls, sheet_name=None) | ||
for sheet_name, df in dfs.items(): | ||
df = df.replace({np.nan: None}) | ||
df.columns = ( | ||
df.columns.str.strip() | ||
.str.lower() | ||
.str.replace("[^\w\s]", "") | ||
.str.replace(" ", "_") | ||
) | ||
dfs[sheet_name] = df | ||
|
||
for sheet_name, df in dfs.items(): | ||
for index, row in df.iterrows(): | ||
place_id = self.process_field(row, "place_id", index) | ||
toponym = self.process_field(row, "toponym", index) | ||
|
||
try: | ||
self.create_location_alias(place_id, toponym) | ||
except IntegrityError as e: | ||
logger.error("Integrity error creating LocationAlias: %s", e) | ||
except Exception as e: | ||
logger.error("Error creating LocationAlias: %s", e) | ||
|
||
except Exception as e: | ||
logger.error("Error loading data: %s", e) | ||
raise e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Generated by Django 5.0.2 on 2024-07-25 12:46 | ||
|
||
from django.db import migrations | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("manuscript", "0076_rename_group_editorialstatus_decorative_group_and_more"), | ||
("manuscript", "0085_remove_location_related_folio"), | ||
] | ||
|
||
operations = [] |
22 changes: 22 additions & 0 deletions
22
manuscript/migrations/0087_singlemanuscript_photographs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Generated by Django 5.0.2 on 2024-07-25 12:46 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("manuscript", "0086_merge_20240725_0846"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="singlemanuscript", | ||
name="photographs", | ||
field=models.FileField( | ||
blank=True, | ||
help_text="Upload photographs of the manuscript.", | ||
null=True, | ||
upload_to="", | ||
), | ||
), | ||
] |
19 changes: 19 additions & 0 deletions
19
manuscript/migrations/0088_alter_singlemanuscript_siglum.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Generated by Django 5.0.2 on 2024-07-25 13:24 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("manuscript", "0087_singlemanuscript_photographs"), | ||
] | ||
|
||
operations = [ | ||
migrations.AlterField( | ||
model_name="singlemanuscript", | ||
name="siglum", | ||
field=models.CharField( | ||
blank=True, db_index=True, max_length=20, null=True, unique=True | ||
), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Generated by Django 5.0.2 on 2024-07-25 15:27 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("manuscript", "0088_alter_singlemanuscript_siglum"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="location", | ||
name="code_id", | ||
field=models.CharField( | ||
blank=True, max_length=100, null=True, verbose_name="Code ID" | ||
), | ||
), | ||
] |
51 changes: 51 additions & 0 deletions
51
manuscript/migrations/0090_linecode_remove_location_line_code_and_more.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Generated by Django 5.0.2 on 2024-07-25 15:34 | ||
|
||
from django.db import migrations, models | ||
|
||
import manuscript.models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("manuscript", "0089_location_code_id"), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="LineCode", | ||
fields=[ | ||
( | ||
"id", | ||
models.BigAutoField( | ||
auto_created=True, | ||
primary_key=True, | ||
serialize=False, | ||
verbose_name="ID", | ||
), | ||
), | ||
( | ||
"code", | ||
models.CharField( | ||
blank=True, | ||
help_text="Input the text by book, stanza, and line number. For example: 01.01.01 refers to book 1, stanza 1, line 1.", | ||
max_length=255, | ||
null=True, | ||
validators=[manuscript.models.validate_line_number_code], | ||
), | ||
), | ||
], | ||
), | ||
migrations.RemoveField( | ||
model_name="location", | ||
name="line_code", | ||
), | ||
migrations.AddField( | ||
model_name="location", | ||
name="line_code", | ||
field=models.ManyToManyField( | ||
blank=True, | ||
help_text="Citation line codes where the toponym appears.", | ||
to="manuscript.linecode", | ||
), | ||
), | ||
] |
23 changes: 23 additions & 0 deletions
23
manuscript/migrations/0091_linecode_associated_iiif_url.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Generated by Django 5.0.2 on 2024-07-25 15:35 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("manuscript", "0090_linecode_remove_location_line_code_and_more"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="linecode", | ||
name="associated_iiif_url", | ||
field=models.URLField( | ||
blank=True, | ||
help_text="The URL to the IIIF manifest for the manuscript. If there isn't one, leave blank.", | ||
max_length=255, | ||
null=True, | ||
verbose_name="Associated IIIF URL", | ||
), | ||
), | ||
] |
Oops, something went wrong.