Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into atf-import-update
Browse files Browse the repository at this point in the history
  • Loading branch information
khoidt committed Nov 7, 2024
2 parents 1973bef + 60feb88 commit d3ab25b
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 260 deletions.
4 changes: 4 additions & 0 deletions ebl/common/domain/provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class Provenance(ProvenanceEnum):
KISURRA = ("Kisurra", "Ksr", "Babylonia", "KSR")
KIS = ("Kiš", "Kiš", "Babylonia", "KSH")
KUTALLA = ("Kutalla", "Kut", "Babylonia", "SFR")
LAGABA = ("Lagaba", "Lag", "Babylonia", "LGBunk")
LAGAS = ("Lagaš", "Lag", "Babylonia", "LAG")
LARAK = ("Larak", "Lrk", "Babylonia", "LRK")
LARSA = ("Larsa", "Lar", "Babylonia", "LAR")
Expand Down Expand Up @@ -94,6 +95,7 @@ class Provenance(ProvenanceEnum):
BAKR_AWA = ("Bakr Āwā", "Bakr", "Periphery", "BAW")
BARAH = ("Bārah", "Bar", "Periphery", "PRA")
BITWATA = ("Bitwātā", "Bitw", "Periphery", "SNJ")
CHUGHA_MISH = ("Chughā Mīsh", "Chughā", "Periphery", "CGM")
DER = ("Dēr", "Der", "Periphery", "DER")
DUGIRDKHAN = ("Dugirdkhan", "Dgr", "Periphery", "DGK")
DUR_UNTAS = ("Dūr-Untaš", "Dun", "Periphery", "COZ")
Expand All @@ -103,6 +105,7 @@ class Provenance(ProvenanceEnum):
GLAYA = ("Glay‘a", "Gla", "Periphery", "GLA")
HAMATH = ("Hamath", "Ham", "Periphery", "HAM")
HATTUSA = ("Ḫattuša", "Hat", "Periphery", "HAT")
KAYSERI = ("Kayseri", "Kay", "Periphery", "KRI")
KANES = ("Kaneš", "Kan", "Periphery", "KNS")
KARKEMIS = ("Karkemiš", "Kar", "Periphery", "KRK")
KIMUNAH = ("Kimūnah", "Kmn", "Periphery", "KMN")
Expand All @@ -121,6 +124,7 @@ class Provenance(ProvenanceEnum):
SUSARRA = ("Šušarra", "Šuš", "Periphery", "SZU")
TALL_BAZ_MUSIYAN = ("Tall Bāz Musiyān", "Bazm", "Periphery", "BZM")
TALL_AL_FAKHAR = ("Tall al-Fakhar", "Fakh", "Periphery", "FAK")
TALL_AL_FAKHIRIYA = ("Tall al-Fakhīriyah", "Fakhī", "Periphery", "FKR")
TALL_GHADAIYRIFAH = ("Tall Ghaḍaiyrīfah", "Ghad", "Periphery", "GDR")
TALL_AL_HAWA = ("Tall al-Hawa", "Haw", "Periphery", "THW")
TALL_IBRAHIM_BAYIS = ("Tall Ibrāhīm Bayis", "Iba", "Periphery", "IBA")
Expand Down
9 changes: 8 additions & 1 deletion ebl/common/query/query_collation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Dict, Iterable, Literal, Sequence
from urllib.parse import parse_qsl

DataType = Literal["dictionary", "afo-register"]
DataType = Literal["dictionary", "afo-register", "colophons"]


class Fields(Enum):
Expand All @@ -18,13 +18,20 @@ class Fields(Enum):
"WILDCARD_FIELDS": [],
"MARKDOWN_FIELDS": ["text"],
}
COLOPHONS = {
"COLLATED_FIELDS": ["names"],
"WILDCARD_FIELDS": [],
"MARKDOWN_FIELDS": [],
}

@staticmethod
def findByDataType(data_type: DataType) -> Dict[str, Sequence[str]]:
if data_type == "dictionary":
return Fields.DICTIONARY.value
elif data_type == "afo-register":
return Fields.AFO_REGISTER.value
elif data_type == "colophons":
return Fields.COLOPHONS.value
else:
raise ValueError("Invalid data type")

Expand Down
4 changes: 4 additions & 0 deletions ebl/fragmentarium/domain/genres.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
("ARCHIVAL", "Legal"),
("ARCHIVAL", "Legal", "Debt Note, Loan"),
("ARCHIVAL", "Legal", "Decree"),
("ARCHIVAL", "Legal", "Deposit"),
("ARCHIVAL", "Legal", "Guardianship"),
("ARCHIVAL", "Legal", "Herding"),
("ARCHIVAL", "Legal", "Hire"),
Expand Down Expand Up @@ -276,6 +277,7 @@
"Ur-Nanše",
),
("CANONICAL", "Lexicography", "Thematic Word Lists", "Practical Vocabulary"),
("CANONICAL", "Lexicography", "Thematic Word Lists", "Ugumu"),
("CANONICAL", "Lexicography", "Thematic Word Lists", "Ura"),
("CANONICAL", "Lexicography", "Vocabularies"),
("CANONICAL", "Lexicography", "Vocabularies", "Antagal"),
Expand Down Expand Up @@ -416,6 +418,8 @@
("CANONICAL", "Magic", "Varia", "Namerimburuda"),
("CANONICAL", "Magic", "Varia", "Zuburudabeda"),
("CANONICAL", "School Tablet"),
("CANONICAL", "School Tablet", "Model contract"),
("CANONICAL", "School Tablet", "Model letter"),
("CANONICAL", "Technical"),
("CANONICAL", "Technical", "Astronomy"),
("CANONICAL", "Technical", "Astronomy", "I.NAM.GIŠ.HUR.AN.KI.A"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,42 @@
aggregate_random,
)
from ebl.transliteration.infrastructure.queries import query_number_is
from ebl.common.query.query_collation import CollatedFieldQuery


def has_none_values(dictionary: dict) -> bool:
return not all(dictionary.values())


def _get_colophon_names_query(name_regex: str) -> Sequence[dict]:
return [
{"$unwind": "$colophon.individuals"},
{
"$project": {
"names": [
"$colophon.individuals.name.value",
"$colophon.individuals.sonOf.value",
"$colophon.individuals.grandsonOf.value",
"$colophon.individuals.family.value",
]
}
},
{"$unwind": "$names"},
{
"$match": {
"names": {
"$regex": rf"{name_regex}",
"$options": "i",
}
}
},
{"$group": {"_id": None, "unique_names": {"$addToSet": "$names"}}},
{"$unwind": "$unique_names"},
{"$sort": {"unique_names": 1}},
{"$project": {"_id": 0, "name": "$unique_names"}},
]


class MongoFragmentRepositoryGetExtended(MongoFragmentRepositoryBase):
def __init__(self, database):
super().__init__(database)
Expand Down Expand Up @@ -162,24 +192,9 @@ def fetch_scopes(self, number: MuseumNumber) -> List[Scope]:
]

def fetch_names(self, name_query: str) -> List[str]:
pipeline = [
{"$unwind": "$colophon.individuals"},
{
"$project": {
"names": [
"$colophon.individuals.name.value",
"$colophon.individuals.sonOf.value",
"$colophon.individuals.grandsonOf.value",
"$colophon.individuals.family.value",
]
}
},
{"$unwind": "$names"},
{"$match": {"names": {"$regex": name_query, "$options": "i"}}},
{"$group": {"_id": None, "unique_names": {"$addToSet": "$names"}}},
{"$unwind": "$unique_names"},
{"$sort": {"unique_names": 1}},
{"$project": {"_id": 0, "name": "$unique_names"}},
]
if len(name_query) < 3:
return []
name_regex = CollatedFieldQuery(name_query, "names", "colophons").value
pipeline = _get_colophon_names_query(name_regex)
cursor = self._fragments.aggregate(pipeline)
return [data["name"] for data in cursor if data["name"]]
3 changes: 2 additions & 1 deletion ebl/tests/fragmentarium/test_fragment_repository_colophon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def test_fetch_names(fragment_repository):
names = ["barmarum", "garmarum", "harmarum", "zarmarum"]
names = ["barmarum", "garmarum", "harmarum", "zarmārum"]
[name, second_name, third_name, fourth_name] = [
NameAttestationFactory.build(value=name) for name in names
]
Expand All @@ -32,3 +32,4 @@ def test_fetch_names(fragment_repository):
fragment_repository.create(fragment)
assert names == fragment_repository.fetch_names("mar")
assert ["pallaqum"] == fragment_repository.fetch_names("pal")
assert [] == fragment_repository.fetch_names("ma")
Original file line number Diff line number Diff line change
@@ -1,123 +1,126 @@
PROVENANCE: "Assa"
| "Baba"
| "Abs"
| "Adb"
| "Ajr"
| "Ala"
| "Ama"
| "Anh"
| "Anš"
| "Ašš"
| "Huz"
| "Kal"
| "Kho"
| "Nin"
| "Tar"
| "Baba"
| "Bab"
| "Bakr"
| "Baq"
| "Bar"
| "Bazm"
| "Bitw"
| "Bor"
| "Btb"
| "Chughā"
| "Cut"
| "Der"
| "Dgr"
| "Dil"
| "Isn"
| "Kiš"
| "Lar"
| "MetS"
| "Met"
| "Nēr"
| "Nip"
| "Sipam"
| "Sip"
| "Šad"
| "Šah"
| "Urk"
| "Ala"
| "Ama"
| "Emr"
| "Hat"
| "Mard"
| "Mar"
| "Meg"
| "Sus"
| "Uga"
| "Diqd"
| "Dka"
| "Har"
| "Img"
| "Ktn"
| "Šub"
| "Guz"
| "Adb"
| "Ešn"
| "Gir"
| "Lag"
| "Nig"
| "Šur"
| "Hur"
| "Ham"
| "Ttb"
| "Umm"
| "Zab"
| "Btb"
| "Dku"
| "Eri"
| "Gar"
| "Irs"
| "Ksr"
| "Kut"
| "Mrd"
| "Maš"
| "Puz"
| "Lrk"
| "Pik"
| "Mal"
| "Paš"
| "Ttl"
| "Elam"
| "Anš"
| "Der"
| "Dun"
| "Ebl"
| "Kan"
| "Kar"
| "Per"
| "Ter"
| "Tgo"
| "Qaṭ"
| "Unc"
| "Ur"
| "Kmn"
| "Ylk"
| "Shn"
| "Zaw"
| "Anh"
| "Elam"
| "Emr"
| "Eri"
| "Ešn"
| "Fakhī"
| "Fakh"
| "Gar"
| "Ghad"
| "Gir"
| "Gla"
| "Diqd"
| "Miz"
| "Guz"
| "Hafr"
| "Haf"
| "Nasr"
| "Sul"
| "Abs"
| "Ajr"
| "Ham"
| "Har"
| "Hat"
| "Haw"
| "Hur"
| "Huz"
| "Iba"
| "Img"
| "Iml"
| "Irs"
| "Isn"
| "Jid"
| "Kal"
| "Kan"
| "Kar"
| "Kay"
| "Khat"
| "Kha"
| "Kho"
| "Kiš"
| "Kmn"
| "Ksr"
| "Ktn"
| "Kut"
| "Lag"
| "Lah"
| "Muh"
| "Ubd"
| "Uaaj"
| "Uqa"
| "Baq"
| "Tum"
| "Waw"
| "Zar"
| "Bakr"
| "Bar"
| "Bitw"
| "Dgr"
| "Lar"
| "Lrk"
| "Mal"
| "Mard"
| "Mar"
| "Maš"
| "Meg"
| "MetS"
| "Met"
| "Milm"
| "Miz"
| "Mrd"
| "Muh"
| "Nasr"
| "Nēr"
| "Nig"
| "Nin"
| "Nip"
| "Nuzi"
| "Paš"
| "Per"
| "Pik"
| "Puz"
| "Qat"
| "Qaṭ"
| "Šad"
| "Šah"
| "Shn"
| "Sipam"
| "Sip"
| "Šub"
| "Sul"
| "Šur"
| "Surj"
| "Sus"
| "Šuš"
| "Bazm"
| "Fakh"
| "Ghad"
| "Haw"
| "Iba"
| "Tar"
| "Ter"
| "Tgo"
| "Tikr"
| "Ttb"
| "Ttl"
| "Tum"
| "Uaaj"
| "Ubd"
| "Uga"
| "Umm"
| "Unc"
| "Uqa"
| "Urk"
| "Ur"
| "Waw"
| "Ylk"
| "Zab"
| "Zar"
| "Zaw"
PERIOD: "ED1_2"
| "Fara"
| "Hel"
Expand Down
Loading

0 comments on commit d3ab25b

Please sign in to comment.