From b10c9b62763e5a429ca28c320510261e2192a6f0 Mon Sep 17 00:00:00 2001
From: Myst <1592048+LeMyst@users.noreply.github.com>
Date: Tue, 10 May 2022 21:11:38 +0200
Subject: [PATCH] Implement new version of FastRun
---
.../inspectionProfiles/WikibaseIntegrator.xml | 4 +-
README.md | 10 +-
pyproject.toml | 1 +
test/test_all.py | 56 +-
test/test_entity_item.py | 10 +-
test/test_wbi_core.py | 2 +-
test/test_wbi_fastrun.py | 211 ----
wikibaseintegrator/datatypes/basedatatype.py | 17 +-
wikibaseintegrator/datatypes/commonsmedia.py | 21 +-
wikibaseintegrator/datatypes/externalid.py | 1 +
wikibaseintegrator/datatypes/form.py | 15 +-
wikibaseintegrator/datatypes/geoshape.py | 5 +
.../datatypes/globecoordinate.py | 39 +-
wikibaseintegrator/datatypes/item.py | 40 +-
wikibaseintegrator/datatypes/lexeme.py | 13 +-
wikibaseintegrator/datatypes/math.py | 33 +
.../datatypes/monolingualtext.py | 30 +-
.../datatypes/musicalnotation.py | 1 +
wikibaseintegrator/datatypes/property.py | 13 +-
wikibaseintegrator/datatypes/quantity.py | 32 +-
wikibaseintegrator/datatypes/sense.py | 15 +-
wikibaseintegrator/datatypes/string.py | 27 +-
wikibaseintegrator/datatypes/tabulardata.py | 5 +
wikibaseintegrator/datatypes/time.py | 35 +-
wikibaseintegrator/datatypes/url.py | 27 +-
wikibaseintegrator/entities/baseentity.py | 38 +-
wikibaseintegrator/entities/item.py | 2 +-
wikibaseintegrator/models/claims.py | 20 +-
wikibaseintegrator/models/qualifiers.py | 2 +-
wikibaseintegrator/models/references.py | 3 +
wikibaseintegrator/models/snaks.py | 3 +
wikibaseintegrator/wbi_fastrun.py | 938 +++++++-----------
32 files changed, 782 insertions(+), 887 deletions(-)
delete mode 100644 test/test_wbi_fastrun.py
diff --git a/.idea/inspectionProfiles/WikibaseIntegrator.xml b/.idea/inspectionProfiles/WikibaseIntegrator.xml
index ff9e317a..be1ca2fd 100644
--- a/.idea/inspectionProfiles/WikibaseIntegrator.xml
+++ b/.idea/inspectionProfiles/WikibaseIntegrator.xml
@@ -25,6 +25,8 @@
+
+
@@ -57,4 +59,4 @@
-
+
\ No newline at end of file
diff --git a/README.md b/README.md
index b6e7f3cd..277910ff 100644
--- a/README.md
+++ b/README.md
@@ -567,7 +567,15 @@ for entrez_id, ensembl in raw_data.items():
Note: Fastrun mode checks for equality of property/value pairs, qualifiers (not including qualifier attributes), labels,
aliases and description, but it ignores references by default!
-References can be checked in fast run mode by setting `use_refs` to `True`.
+References can be checked in fast run mode by setting `use_references` to `True`.
+
+# Statistics #
+
+| Dataset | partial fastrun | fastrun without qualifiers/references | fastrun with qualifiers | fastrun with qualifiers/references |
+|:----------------------------|----------------:|--------------------------------------:|------------------------:|-----------------------------------:|
+| Communes (34990 elements) | ? | 7min | 30s | 60s |
+| Cantons (2042 elements) | ? | ? | ? | ? |
+| Départements (100 elements) | 70min | 1s | 30s | 60s |
# Debugging #
diff --git a/pyproject.toml b/pyproject.toml
index 80143fed..192abab4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,3 +34,4 @@ disable = [
[tool.pytest.ini_options]
log_cli = true
+log_cli_level = 'DEBUG'
diff --git a/test/test_all.py b/test/test_all.py
index 22055204..5797b525 100644
--- a/test/test_all.py
+++ b/test/test_all.py
@@ -1,12 +1,12 @@
import copy
+import logging
import unittest
from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun
from wikibaseintegrator.datatypes import BaseDataType, Item
-from wikibaseintegrator.entities import ItemEntity
+from wikibaseintegrator.entities import BaseEntity, ItemEntity
from wikibaseintegrator.wbi_config import config as wbi_config
-from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatatype
-from wikibaseintegrator.wbi_fastrun import get_fastrun_container
+from wikibaseintegrator.wbi_enums import WikibaseDatatype
wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_all.py)'
@@ -53,66 +53,24 @@ class TestFastRun(unittest.TestCase):
"""
some basic tests for fastrun mode
"""
+ logging.basicConfig(level=logging.DEBUG)
def test_fastrun(self):
statements = [
- datatypes.ExternalID(value='P40095', prop_nr='P352'),
+ datatypes.ExternalID(value='A0A023PZB3', prop_nr='P352'),
datatypes.ExternalID(value='YER158C', prop_nr='P705')
]
frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType)
- fastrun_result = frc.write_required(data=statements)
+ entity = BaseEntity().add_claims(statements)
- if fastrun_result:
- message = 'fastrun failed'
- else:
- message = 'successful fastrun'
+ fastrun_result = frc.write_required(entity=entity)
# here, fastrun should succeed, if not, test failed
if fastrun_result:
raise ValueError
- def test_fastrun_label(self):
- # tests fastrun label, description and aliases, and label in another language
- frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')])
- item = WikibaseIntegrator().item.get('Q2')
-
- assert item.labels.get(language='en') == "Earth"
- descr = item.descriptions.get(language='en')
- assert len(descr) > 3
- assert "the Earth" in item.aliases.get()
-
- assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en')
- assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label')
- assert "the Earth" in item.aliases.get()
- assert "planet" in item.descriptions.get()
-
- assert item.labels.get('es') == "Tierra"
-
- item.descriptions.set(value=descr)
- item.descriptions.set(value="fghjkl")
- assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'}
- item.labels.set(value="Earth")
- item.labels.set(value="xfgfdsg")
- assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'}
- item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND)
- assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en']
-
- # something that's empty (for now.., can change, so this just makes sure no exception is thrown)
- frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label')
- frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label')
- frc.check_language_data("Q2", [''], 'ak', 'description')
- frc.check_language_data("Q2", [], 'ak', 'aliases')
- frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases')
-
- item.labels.get(language="ak")
- item.descriptions.get(language='ak')
- item.aliases.get(language="ak")
- item.labels.set(value="label", language="ak")
- item.descriptions.set(value="d", language="ak")
- item.aliases.set(values=["a"], language="ak", action_if_exists=ActionIfExists.APPEND)
-
def test_sitelinks():
item = wbi.item.get('Q622901')
diff --git a/test/test_entity_item.py b/test/test_entity_item.py
index a2dd4588..ce71534f 100644
--- a/test/test_entity_item.py
+++ b/test/test_entity_item.py
@@ -48,18 +48,18 @@ def test_write(self):
def test_write_not_required(self):
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1791')])
+ def test_write_not_required_ref(self):
+ assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)
+
def test_write_required(self):
item = wbi.item.get('Q582')
item.claims.add(Item(prop_nr='P1791', value='Q42'))
assert item.write_required([BaseDataType(prop_nr='P1791')])
- def test_write_not_required_ref(self):
- assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)
-
def test_write_required_ref(self):
item = wbi.item.get('Q582')
- item.claims.get('P2581')[0].references.references.pop()
- assert item.write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)
+ item.claims.get('P1464')[0].references.references.pop()
+ assert item.write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)
def test_long_item_id(self):
assert wbi.item.get('Item:Q582').id == 'Q582'
diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py
index 01b16d8c..07333df7 100644
--- a/test/test_wbi_core.py
+++ b/test/test_wbi_core.py
@@ -232,7 +232,7 @@ def test_new_item_creation(self):
MonolingualText(text='xxx', language='fr', prop_nr='P7'),
Quantity(amount=-5.04, prop_nr='P8'),
Quantity(amount=5.06, upper_bound=9.99, lower_bound=-2.22, unit='Q11573', prop_nr='P8'),
- CommonsMedia(value='xxx', prop_nr='P9'),
+ CommonsMedia(value="Place lazare goujon.jpg", prop_nr='P9'),
GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr='P10'),
GeoShape(value='Data:xxx.map', prop_nr='P11'),
Property(value='P123', prop_nr='P12'),
diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py
deleted file mode 100644
index 4a6eb156..00000000
--- a/test/test_wbi_fastrun.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from collections import defaultdict
-from typing import Any
-
-from wikibaseintegrator import WikibaseIntegrator, wbi_fastrun
-from wikibaseintegrator.datatypes import BaseDataType, ExternalID, Item
-from wikibaseintegrator.wbi_config import config as wbi_config
-from wikibaseintegrator.wbi_enums import ActionIfExists
-
-wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_wbi_fastrun.py)'
-
-wbi = WikibaseIntegrator()
-
-
-def test_query_data():
- """
- test_fastrun.test_query_data
- This hits live wikidata and may change !!
-
- This tests that the fast run container correctly queries data from wikidata and stores it in the appropriate format
- without getting references
- """
- frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P699')], base_data_type=BaseDataType)
- # get a string value
- frc._query_data('P699')
- # wikidata-item value
- frc._query_data('P828')
- # uri value
- frc._query_data('P2888')
-
- # https://www.wikidata.org/wiki/Q10874
- assert 'Q10874' in frc.prop_data
- assert 'P699' in frc.prop_data['Q10874']
- # the ID may change, so retrieve it
- statement_id = list(frc.prop_data['Q10874']['P699'].keys())[0]
- d = frc.prop_data['Q10874']['P699'][statement_id]
- # d looks like: {'qual': set(), 'ref': {}, 'v': 'DOID:1432'}
- assert all(x in d for x in {'qual', 'ref', 'v'})
- assert frc.prop_data['Q10874']['P699'][statement_id]['v'].startswith('"DOID:')
-
- # item
- assert list(frc.prop_data['Q10874']['P828'].values())[0]['v'] == "Q18228398"
-
- # uri
- v = {x['v'] for x in frc.prop_data['Q10874']['P2888'].values()}
- assert all(y.startswith(" 0
- ref_id = list(d['ref'].keys())[0]
- ref = d['ref'][ref_id]
- assert len(ref) > 1
-
-
-class FastRunContainerFakeQueryDataEnsembl(wbi_fastrun.FastRunContainer):
- def __init__(self, *args: Any, **kwargs: Any):
- super().__init__(*args, **kwargs)
- self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'}
- self.prop_data['Q14911732'] = {'P594': {
- 'fake statement id': {
- 'qual': set(),
- 'ref': {'fake ref id': {
- ('P248',
- 'Q106833387'),
- ('P594',
- 'ENSG00000123374')}},
- 'unit': '1',
- 'v': '"ENSG00000123374"'}}}
- self.rev_lookup = defaultdict(set)
- self.rev_lookup['"ENSG00000123374"'].add('Q14911732')
-
-
-class FastRunContainerFakeQueryDataEnsemblNoRef(wbi_fastrun.FastRunContainer):
- def __init__(self, *args: Any, **kwargs: Any):
- super().__init__(*args, **kwargs)
- self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'}
- self.prop_data['Q14911732'] = {'P594': {
- 'fake statement id': {
- 'qual': set(),
- 'ref': {},
- 'v': 'ENSG00000123374'}}}
- self.rev_lookup = defaultdict(set)
- self.rev_lookup['"ENSG00000123374"'].add('Q14911732')
-
-
-def test_fastrun_ref_ensembl():
- # fastrun checks refs
- frc = FastRunContainerFakeQueryDataEnsembl(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=True)
-
- # statement has no ref
- statements = [ExternalID(value='ENSG00000123374', prop_nr='P594')]
- assert frc.write_required(data=statements)
-
- # statement has the same ref
- statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q106833387", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594")]])]
- assert not frc.write_required(data=statements)
-
- # new statement has an different stated in
- statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q99999999999", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594", )]])]
- assert frc.write_required(data=statements)
-
- # fastrun don't check references, statement has no reference,
- frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType,
- use_refs=False)
- statements = [ExternalID(value='ENSG00000123374', prop_nr='P594')]
- assert not frc.write_required(data=statements)
-
- # fastrun don't check references, statement has reference,
- frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType,
- use_refs=False)
- statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q123", prop_nr="P31")]])]
- assert not frc.write_required(data=statements)
-
-
-class FakeQueryDataAppendProps(wbi_fastrun.FastRunContainer):
- # an item with three values for the same property
- def __init__(self, *args: Any, **kwargs: Any):
- super().__init__(*args, **kwargs)
- self.prop_dt_map = {'P527': 'wikibase-item', 'P248': 'wikibase-item', 'P594': 'external-id'}
-
- self.rev_lookup = defaultdict(set)
- self.rev_lookup['Q24784025'].add('Q3402672')
- self.rev_lookup['Q24743729'].add('Q3402672')
- self.rev_lookup['Q24782625'].add('Q3402672')
-
- self.prop_data['Q3402672'] = {'P527': {
- 'Q3402672-11BA231B-857B-498B-AC4F-91D71EE007FD': {'qual': set(),
- 'ref': {
- '149c9c7ba4e246d9f09ce3ed0cdf7aa721aad5c8': {
- ('P248', 'Q3047275'),
- }},
- 'v': 'Q24784025'},
- 'Q3402672-15F54AFF-7DCC-4DF6-A32F-73C48619B0B2': {'qual': set(),
- 'ref': {
- '149c9c7ba4e246d9f09ce3ed0cdf7aa721aad5c8': {
- ('P248', 'Q3047275'),
- }},
- 'v': 'Q24743729'},
- 'Q3402672-C8F11D55-1B11-44E5-9EAF-637E062825A4': {'qual': set(),
- 'ref': {
- '149c9c7ba4e246d9f09ce3ed0cdf7aa721aad5c8': {
- ('P248', 'Q3047275')}},
- 'v': 'Q24782625'}}}
-
-
-def test_append_props():
- qid = 'Q3402672'
- # https://www.wikidata.org/wiki/Q3402672#P527
-
- # don't consider refs
- frc = FakeQueryDataAppendProps(base_filter=[BaseDataType(prop_nr='P352'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType)
- # with append
- statements = [Item(value='Q24784025', prop_nr='P527')]
- assert frc.write_required(data=statements, action_if_exists=ActionIfExists.APPEND, cqid=qid) is False
- # with force append
- statements = [Item(value='Q24784025', prop_nr='P527')]
- assert frc.write_required(data=statements, action_if_exists=ActionIfExists.FORCE_APPEND, cqid=qid) is True
- # without append
- statements = [Item(value='Q24784025', prop_nr='P527')]
- assert frc.write_required(data=statements, cqid=qid) is True
-
- # if we are in append mode, and the refs are different, we should write
- frc = FakeQueryDataAppendProps(base_filter=[BaseDataType(prop_nr='P352'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=True)
- # with append
- statements = [Item(value='Q24784025', prop_nr='P527')]
- assert frc.write_required(data=statements, cqid=qid, action_if_exists=ActionIfExists.APPEND) is True
- # without append
- statements = [Item(value='Q24784025', prop_nr='P527')]
- assert frc.write_required(data=statements, cqid=qid) is True
diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py
index 94c62b7b..24438bb7 100644
--- a/wikibaseintegrator/datatypes/basedatatype.py
+++ b/wikibaseintegrator/datatypes/basedatatype.py
@@ -1,7 +1,7 @@
from __future__ import annotations
import re
-from typing import Any, List, Type, Union
+from typing import Any, Dict, List, Optional, Type, Union
from wikibaseintegrator.models import Claim
@@ -11,6 +11,7 @@ class BaseDataType(Claim):
The base class for all Wikibase data types, they inherit from it
"""
DTYPE = 'base-data-type'
+ PTYPE = 'property-data-type'
subclasses: List[Type[BaseDataType]] = []
sparql_query: str = '''
SELECT * WHERE {{
@@ -28,7 +29,14 @@ def __init__(self, prop_nr: Union[int, str] = None, **kwargs: Any):
super().__init__(**kwargs)
- self.mainsnak.property_number = prop_nr or None
+ if isinstance(prop_nr, str):
+ pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$')
+ matches = pattern.match(str(prop_nr))
+
+ if matches:
+ prop_nr = prop_nr.rsplit('/', 1)[-1]
+
+ self.mainsnak.property_number = prop_nr
# self.subclasses.append(self)
# Allow registration of subclasses of BaseDataType into BaseDataType.subclasses
@@ -39,7 +47,7 @@ def __init_subclass__(cls, **kwargs):
def set_value(self, value: Any = None):
pass
- def get_sparql_value(self) -> str:
+ def get_sparql_value(self, **kwargs) -> Optional[str]:
return '"' + self.mainsnak.datavalue['value'] + '"'
def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
@@ -61,3 +69,6 @@ def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
raise ValueError
return True
+
+ def from_sparql_value(self, sparql_value: Dict) -> BaseDataType:
+ pass
diff --git a/wikibaseintegrator/datatypes/commonsmedia.py b/wikibaseintegrator/datatypes/commonsmedia.py
index c444437d..1bfec524 100644
--- a/wikibaseintegrator/datatypes/commonsmedia.py
+++ b/wikibaseintegrator/datatypes/commonsmedia.py
@@ -1,17 +1,30 @@
import re
import urllib.parse
-from wikibaseintegrator.datatypes.string import String
+from wikibaseintegrator.datatypes.url import URL
-class CommonsMedia(String):
+class CommonsMedia(URL):
"""
Implements the Wikibase data type for Wikimedia commons media files
"""
DTYPE = 'commonsMedia'
+ PTYPE = 'http://wikiba.se/ontology#CommonsMedia'
- def get_sparql_value(self) -> str:
- return '<' + self.mainsnak.datavalue['value'] + '>'
+ def set_value(self, value: str = None):
+ assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})"
+
+ if value:
+ pattern = re.compile(r'^.+\..+$')
+ matches = pattern.match(value)
+
+ if not matches:
+ raise ValueError(f"Invalid CommonsMedia {value}")
+
+ self.mainsnak.datavalue = {
+ 'value': value,
+ 'type': 'string'
+ }
def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
pattern = re.compile(r'^.*?/?([^/]*?)>?$')
diff --git a/wikibaseintegrator/datatypes/externalid.py b/wikibaseintegrator/datatypes/externalid.py
index c4838138..6b88ca7e 100644
--- a/wikibaseintegrator/datatypes/externalid.py
+++ b/wikibaseintegrator/datatypes/externalid.py
@@ -6,3 +6,4 @@ class ExternalID(String):
Implements the Wikibase data type 'external-id'
"""
DTYPE = 'external-id'
+ PTYPE = 'http://wikiba.se/ontology#ExternalId'
diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py
index c0237154..ca77fd5a 100644
--- a/wikibaseintegrator/datatypes/form.py
+++ b/wikibaseintegrator/datatypes/form.py
@@ -1,7 +1,9 @@
import re
-from typing import Any
+from typing import Any, Optional
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
+from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class Form(BaseDataType):
@@ -9,6 +11,7 @@ class Form(BaseDataType):
Implements the Wikibase data type 'wikibase-form'
"""
DTYPE = 'wikibase-form'
+ PTYPE = 'http://wikiba.se/ontology#WikibaseForm'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -55,5 +58,11 @@ def set_value(self, value: str = None):
'type': 'wikibase-entityid'
}
- def get_sparql_value(self) -> str:
- return self.mainsnak.datavalue['value']['id']
+ # TODO: add from_sparql_value()
+
+ def get_sparql_value(self, **kwargs) -> Optional[str]:
+ if self.mainsnak.snaktype == WikibaseSnakType.KNOWN_VALUE:
+ wikibase_url = str(kwargs['wikibase_url'] if 'wikibase_url' in kwargs else config['WIKIBASE_URL'])
+ return f'<{wikibase_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>'
+
+ return None
diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py
index 7a7210bc..88139372 100644
--- a/wikibaseintegrator/datatypes/geoshape.py
+++ b/wikibaseintegrator/datatypes/geoshape.py
@@ -9,6 +9,7 @@ class GeoShape(BaseDataType):
Implements the Wikibase data type 'geo-shape'
"""
DTYPE = 'geo-shape'
+ PTYPE = 'http://wikiba.se/ontology#GeoShape'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -53,3 +54,7 @@ def set_value(self, value: str = None):
'value': value,
'type': 'string'
}
+
+ # TODO: Does GeoShape need a full URL to wikimedia commons?
+ def get_sparql_value(self, **kwargs: Any) -> str:
+ return '<' + self.mainsnak.datavalue['value'] + '>'
diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py
index 30e206ec..468e0caa 100644
--- a/wikibaseintegrator/datatypes/globecoordinate.py
+++ b/wikibaseintegrator/datatypes/globecoordinate.py
@@ -1,9 +1,12 @@
+from __future__ import annotations
+
import re
-from typing import Any
+from typing import Any, Dict
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
from wikibaseintegrator.models import Claim
from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class GlobeCoordinate(BaseDataType):
@@ -11,6 +14,7 @@ class GlobeCoordinate(BaseDataType):
Implements the Wikibase data type for globe coordinates
"""
DTYPE = 'globe-coordinate'
+ PTYPE = 'http://wikiba.se/ontology#GlobeCoordinate'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -77,8 +81,37 @@ def __eq__(self, other):
return super().__eq__(other)
- def get_sparql_value(self) -> str:
- return '"Point(' + str(self.mainsnak.datavalue['value']['longitude']) + ' ' + str(self.mainsnak.datavalue['value']['latitude']) + ')"'
+ def from_sparql_value(self, sparql_value: Dict) -> GlobeCoordinate:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of datatype, type and value
+ :return: True if the parsing is successful
+ """
+ datatype = sparql_value['datatype']
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if datatype != 'http://www.opengis.net/ont/geosparql#wktLiteral':
+ raise ValueError('Wrong SPARQL datatype')
+
+ if type != 'literal':
+ raise ValueError('Wrong SPARQL type')
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ pattern = re.compile(r'^Point\((.*) (.*)\)$')
+ matches = pattern.match(value)
+ if not matches:
+ raise ValueError('Invalid SPARQL value')
+
+ self.set_value(longitude=float(matches.group(1)), latitude=float(matches.group(2)))
+
+ return self
+
+ def get_sparql_value(self, **kwargs: Any) -> str:
+ return '"Point(' + str(self.mainsnak.datavalue['value']['longitude']) + ' ' + str(self.mainsnak.datavalue['value']['latitude']) + ')"^^geo:wktLiteral'
def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
pattern = re.compile(r'^"?Point\((.*) (.*)\)"?(?:\^\^geo:wktLiteral)?$')
diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py
index 1e8e3e52..19eec434 100644
--- a/wikibaseintegrator/datatypes/item.py
+++ b/wikibaseintegrator/datatypes/item.py
@@ -1,7 +1,11 @@
+from __future__ import annotations
+
import re
-from typing import Any, Union
+from typing import Any, Dict, Optional, Union
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
+from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class Item(BaseDataType):
@@ -9,6 +13,7 @@ class Item(BaseDataType):
Implements the Wikibase data type 'wikibase-item' with a value being another item ID
"""
DTYPE = 'wikibase-item'
+ PTYPE = 'http://wikiba.se/ontology#WikibaseItem'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -48,5 +53,34 @@ def set_value(self, value: Union[str, int] = None):
'type': 'wikibase-entityid'
}
- def get_sparql_value(self) -> str:
- return '<{wb_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>'
+ def from_sparql_value(self, sparql_value: Dict) -> Item:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of type and value
+ :return: True if the parsing is successful
+ """
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if type != 'uri':
+ raise ValueError('Wrong SPARQL type')
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ pattern = re.compile(r'^.+/([PQLM]\d+)$')
+ matches = pattern.match(value)
+ if not matches:
+ raise ValueError(f"Invalid SPARQL value {value}")
+
+ self.set_value(value=str(matches.group(1)))
+
+ return self
+
+ def get_sparql_value(self, **kwargs) -> Optional[str]:
+ if self.mainsnak.snaktype == WikibaseSnakType.KNOWN_VALUE:
+ wikibase_url = str(kwargs['wikibase_url'] if 'wikibase_url' in kwargs else config['WIKIBASE_URL'])
+ return f'<{wikibase_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>'
+
+ return None
diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py
index 109da96f..c7b4d501 100644
--- a/wikibaseintegrator/datatypes/lexeme.py
+++ b/wikibaseintegrator/datatypes/lexeme.py
@@ -1,7 +1,9 @@
import re
-from typing import Any, Union
+from typing import Any, Optional, Union
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
+from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class Lexeme(BaseDataType):
@@ -9,6 +11,7 @@ class Lexeme(BaseDataType):
Implements the Wikibase data type 'wikibase-lexeme'
"""
DTYPE = 'wikibase-lexeme'
+ PTYPE = 'http://wikiba.se/ontology#WikibaseLexeme'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -48,5 +51,9 @@ def set_value(self, value: Union[str, int] = None):
'type': 'wikibase-entityid'
}
- def get_sparql_value(self) -> str:
- return self.mainsnak.datavalue['value']['id']
+ def get_sparql_value(self, **kwargs) -> Optional[str]:
+ if self.mainsnak.snaktype == WikibaseSnakType.KNOWN_VALUE:
+ wikibase_url = str(kwargs['wikibase_url'] if 'wikibase_url' in kwargs else config['WIKIBASE_URL'])
+ return f'<{wikibase_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>'
+
+ return None
diff --git a/wikibaseintegrator/datatypes/math.py b/wikibaseintegrator/datatypes/math.py
index 7ad3f3cc..f7060d0c 100644
--- a/wikibaseintegrator/datatypes/math.py
+++ b/wikibaseintegrator/datatypes/math.py
@@ -1,4 +1,9 @@
+from __future__ import annotations
+
+from typing import Any, Dict
+
from wikibaseintegrator.datatypes.string import String
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class Math(String):
@@ -6,3 +11,31 @@ class Math(String):
Implements the Wikibase data type 'math' for mathematical formula in TEX format
"""
DTYPE = 'math'
+ PTYPE = 'http://wikiba.se/ontology#Math'
+
+ def from_sparql_value(self, sparql_value: Dict) -> Math:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of type and value
+ :return:
+ """
+ datatype = sparql_value['datatype']
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if datatype != 'http://www.w3.org/2001/XMLSchema#dateTime':
+ raise ValueError('Wrong SPARQL datatype')
+
+ if type != 'literal':
+ raise ValueError('Wrong SPARQL type')
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ self.set_value(value=value)
+
+ return self
+
+ def get_sparql_value(self, **kwargs: Any) -> str:
+ return '"' + self.mainsnak.datavalue['value'] + '"^^'
diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py
index c7044e4e..97a82a71 100644
--- a/wikibaseintegrator/datatypes/monolingualtext.py
+++ b/wikibaseintegrator/datatypes/monolingualtext.py
@@ -1,8 +1,11 @@
+from __future__ import annotations
+
import re
-from typing import Any
+from typing import Any, Dict
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class MonolingualText(BaseDataType):
@@ -10,6 +13,8 @@ class MonolingualText(BaseDataType):
Implements the Wikibase data type for Monolingual Text strings
"""
DTYPE = 'monolingualtext'
+ PTYPE = 'http://wikiba.se/ontology#Monolingualtext'
+
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -43,7 +48,28 @@ def set_value(self, text: str = None, language: str = None):
'type': 'monolingualtext'
}
- def get_sparql_value(self) -> str:
+ def from_sparql_value(self, sparql_value: Dict) -> MonolingualText:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of datatype, type and value
+ :return: True if the parsing is successful
+ """
+ xml_lang = sparql_value['xml:lang']
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if type != 'literal':
+ raise ValueError(f"Wrong SPARQL type {type}")
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ self.set_value(text=value, language=xml_lang)
+
+ return self
+
+ def get_sparql_value(self, **kwargs: Any) -> str:
return '"' + self.mainsnak.datavalue['value']['text'].replace('"', r'\"') + '"@' + self.mainsnak.datavalue['value']['language']
def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
diff --git a/wikibaseintegrator/datatypes/musicalnotation.py b/wikibaseintegrator/datatypes/musicalnotation.py
index 25d1de76..083a9698 100644
--- a/wikibaseintegrator/datatypes/musicalnotation.py
+++ b/wikibaseintegrator/datatypes/musicalnotation.py
@@ -6,3 +6,4 @@ class MusicalNotation(String):
Implements the Wikibase data type 'musical-notation'
"""
DTYPE = 'musical-notation'
+ PTYPE = 'http://wikiba.se/ontology#MusicalNotation'
diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py
index a76e4263..1cb3ef47 100644
--- a/wikibaseintegrator/datatypes/property.py
+++ b/wikibaseintegrator/datatypes/property.py
@@ -1,7 +1,9 @@
import re
-from typing import Any, Union
+from typing import Any, Optional, Union
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
+from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class Property(BaseDataType):
@@ -9,6 +11,7 @@ class Property(BaseDataType):
Implements the Wikibase data type 'property'
"""
DTYPE = 'wikibase-property'
+ PTYPE = 'http://wikiba.se/ontology#Property'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -49,5 +52,9 @@ def set_value(self, value: Union[str, int] = None):
'type': 'wikibase-entityid'
}
- def get_sparql_value(self) -> str:
- return self.mainsnak.datavalue['value']['id']
+ def get_sparql_value(self, **kwargs) -> Optional[str]:
+ if self.mainsnak.snaktype == WikibaseSnakType.KNOWN_VALUE:
+ wikibase_url = str(kwargs['wikibase_url'] if 'wikibase_url' in kwargs else config['WIKIBASE_URL'])
+ return f'<{wikibase_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>'
+
+ return None
diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py
index a9a95209..73fcc6a5 100644
--- a/wikibaseintegrator/datatypes/quantity.py
+++ b/wikibaseintegrator/datatypes/quantity.py
@@ -1,7 +1,10 @@
-from typing import Any, Union
+from __future__ import annotations
+
+from typing import Any, Dict, Union
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
from wikibaseintegrator.wbi_helpers import format_amount
@@ -10,6 +13,7 @@ class Quantity(BaseDataType):
Implements the Wikibase data type for quantities
"""
DTYPE = 'quantity'
+ PTYPE = 'http://wikiba.se/ontology#Quantity'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -81,7 +85,31 @@ def set_value(self, amount: Union[str, int, float] = None, upper_bound: Union[st
if not lower_bound:
del self.mainsnak.datavalue['value']['lowerBound']
- def get_sparql_value(self) -> str:
+ def from_sparql_value(self, sparql_value: Dict) -> Quantity:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of datatype, type and value
+ :return: True if the parsing is successful
+ """
+ datatype = sparql_value['datatype']
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if datatype != 'http://www.w3.org/2001/XMLSchema#decimal':
+ raise ValueError(f"Wrong SPARQL datatype {datatype}")
+
+ if type != 'literal':
+ raise ValueError(f"Wrong SPARQL type {type}")
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ self.set_value(amount=value)
+
+ return self
+
+ def get_sparql_value(self, **kwargs: Any) -> str:
return '"' + format_amount(self.mainsnak.datavalue['value']['amount']) + '"^^xsd:decimal'
def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py
index 6dcbd826..ab30d9f3 100644
--- a/wikibaseintegrator/datatypes/sense.py
+++ b/wikibaseintegrator/datatypes/sense.py
@@ -1,7 +1,9 @@
import re
-from typing import Any
+from typing import Any, Optional
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
+from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class Sense(BaseDataType):
@@ -9,6 +11,7 @@ class Sense(BaseDataType):
Implements the Wikibase data type 'wikibase-sense'
"""
DTYPE = 'wikibase-sense'
+ PTYPE = 'http://wikiba.se/ontology#WikibaseSense'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -44,5 +47,11 @@ def set_value(self, value: str = None):
'type': 'wikibase-entityid'
}
- def get_sparql_value(self) -> str:
- return self.mainsnak.datavalue['value']['id']
+ # TODO: add from_sparql_value()
+
+ def get_sparql_value(self, **kwargs) -> Optional[str]:
+ if self.mainsnak.snaktype == WikibaseSnakType.KNOWN_VALUE:
+ wikibase_url = str(kwargs['wikibase_url'] if 'wikibase_url' in kwargs else config['WIKIBASE_URL'])
+ return f'<{wikibase_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>'
+
+ return None
diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py
index a6afd761..15b98959 100644
--- a/wikibaseintegrator/datatypes/string.py
+++ b/wikibaseintegrator/datatypes/string.py
@@ -1,14 +1,17 @@
-from typing import Any
+from __future__ import annotations
+
+from typing import Any, Dict
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class String(BaseDataType):
"""
Implements the Wikibase data type 'string'
"""
-
DTYPE = 'string'
+ PTYPE = 'http://wikiba.se/ontology#String'
def __init__(self, value: str = None, **kwargs: Any):
"""
@@ -28,3 +31,23 @@ def set_value(self, value: str = None):
'value': value,
'type': 'string'
}
+
+ def from_sparql_value(self, sparql_value: Dict) -> String:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of type and value
+ :return:
+ """
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if type != 'literal':
+ raise ValueError(f"Wrong SPARQL type {type}")
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ self.set_value(value=value)
+
+ return self
diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py
index 118e75ca..9693d932 100644
--- a/wikibaseintegrator/datatypes/tabulardata.py
+++ b/wikibaseintegrator/datatypes/tabulardata.py
@@ -9,6 +9,7 @@ class TabularData(BaseDataType):
Implements the Wikibase data type 'tabular-data'
"""
DTYPE = 'tabular-data'
+ PTYPE = 'http://wikiba.se/ontology#TabularData'
def __init__(self, value: str = None, **kwargs: Any):
"""
@@ -34,3 +35,7 @@ def set_value(self, value: str = None):
'value': value,
'type': 'string'
}
+
+ # TODO: Does TabularData need a full URL to wikimedia commons?
+ def get_sparql_value(self, **kwargs: Any) -> str:
+ return '<' + self.mainsnak.datavalue['value'] + '>'
diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py
index 6c9a480d..f26db300 100644
--- a/wikibaseintegrator/datatypes/time.py
+++ b/wikibaseintegrator/datatypes/time.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
+
import datetime
import re
-from typing import Any, Union
+from typing import Any, Dict, Union
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
from wikibaseintegrator.wbi_config import config
-from wikibaseintegrator.wbi_enums import WikibaseDatePrecision
+from wikibaseintegrator.wbi_enums import WikibaseDatePrecision, WikibaseSnakType
class Time(BaseDataType):
@@ -12,6 +14,7 @@ class Time(BaseDataType):
Implements the Wikibase data type with date and time values
"""
DTYPE = 'time'
+ PTYPE = 'http://wikiba.se/ontology#Time'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -80,5 +83,29 @@ def set_value(self, time: str = None, before: int = 0, after: int = 0, precision
'type': 'time'
}
- def get_sparql_value(self) -> str:
- return self.mainsnak.datavalue['value']['time']
+ def from_sparql_value(self, sparql_value: Dict) -> Time:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of type and value
+ :return:
+ """
+ datatype = sparql_value['datatype']
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if datatype != 'http://www.w3.org/2001/XMLSchema#dateTime':
+ raise ValueError('Wrong SPARQL datatype')
+
+ if type != 'literal':
+ raise ValueError('Wrong SPARQL type')
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ self.set_value(time=value)
+
+ return self
+
+ def get_sparql_value(self, **kwargs: Any) -> str:
+ return '"' + self.mainsnak.datavalue['value']['time'] + '"^^xsd:dateTime'
diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py
index fc0184dc..9f017541 100644
--- a/wikibaseintegrator/datatypes/url.py
+++ b/wikibaseintegrator/datatypes/url.py
@@ -1,7 +1,10 @@
+from __future__ import annotations
+
import re
-from typing import Any
+from typing import Any, Dict
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
+from wikibaseintegrator.wbi_enums import WikibaseSnakType
class URL(BaseDataType):
@@ -9,6 +12,7 @@ class URL(BaseDataType):
Implements the Wikibase data type for URL strings
"""
DTYPE = 'url'
+ PTYPE = 'http://wikiba.se/ontology#Url'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
@@ -41,7 +45,26 @@ def set_value(self, value: str = None):
'type': 'string'
}
- def get_sparql_value(self) -> str:
+ def from_sparql_value(self, sparql_value: Dict) -> URL:
+ """
+ Parse data returned by a SPARQL endpoint and set the value to the object
+
+ :param sparql_value: A SPARQL value composed of type and value
+ :return:
+ """
+ type = sparql_value['type']
+ value = sparql_value['value']
+
+ if type != 'uri':
+ raise ValueError(f"Wrong SPARQL type {type}")
+
+ if value.startswith('http://www.wikidata.org/.well-known/genid/'):
+ self.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE
+ else:
+ self.set_value(value=value)
+ return self
+
+ def get_sparql_value(self, **kwargs: Any) -> str:
return '<' + self.mainsnak.datavalue['value'] + '>'
def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py
index b85cc2f3..0a15e4ef 100644
--- a/wikibaseintegrator/entities/baseentity.py
+++ b/wikibaseintegrator/entities/baseentity.py
@@ -2,7 +2,7 @@
import logging
from copy import copy
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Union
import ujson
@@ -100,9 +100,13 @@ def claims(self) -> Claims:
return self.__claims
@claims.setter
- def claims(self, value: Claims):
- if not isinstance(value, Claims):
+ def claims(self, value: Union[Claim, Claims]):
+ if not isinstance(value, Claims) and not isinstance(value, Claim):
raise TypeError
+
+ if isinstance(value, Claim):
+ value = Claims().add(claims=value)
+
self.__claims = value
def add_claims(self, claims: Union[Claim, List], action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> BaseEntity:
@@ -186,6 +190,21 @@ def clear(self, **kwargs: Any) -> Dict[str, Any]:
"""
return self._write(data={}, clear=True, **kwargs)
+ def get_claims(self, property: str, login: _Login = None, allow_anonymous: bool = True, is_bot: bool = None, **kwargs: Any):
+ params = {
+ 'action': 'wbgetclaims',
+ 'entity': self.id,
+ 'property': property,
+ 'format': 'json'
+ }
+
+ login = login or self.api.login
+ is_bot = is_bot if is_bot is not None else self.api.is_bot
+
+ json_data = mediawiki_api_call_helper(data=params, login=login, allow_anonymous=allow_anonymous, is_bot=is_bot, **kwargs)
+ self.claims.from_json(json_data['claims'])
+ return self
+
def _write(self, data: Dict = None, summary: str = None, login: _Login = None, allow_anonymous: bool = False, clear: bool = False, is_bot: bool = None, **kwargs: Any) -> Dict[
str, Any]:
"""
@@ -288,20 +307,19 @@ def delete(self, login: _Login = None, allow_anonymous: bool = False, is_bot: bo
return delete_page(title=None, pageid=self.pageid, login=login, allow_anonymous=allow_anonymous, is_bot=is_bot, **kwargs)
- def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE, **kwargs: Any) -> bool:
+ def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]], **kwargs: Any) -> bool:
fastrun_container = wbi_fastrun.get_fastrun_container(base_filter=base_filter, **kwargs)
- if base_filter is None:
- base_filter = []
-
- claims_to_check = []
+ pfilter: Set = set()
for claim in self.claims:
if claim.mainsnak.property_number in base_filter:
- claims_to_check.append(claim)
+ pfilter.add(claim.mainsnak.property_number)
+
+ property_filter: List[str] = list(pfilter)
# TODO: Add check_language_data
- return fastrun_container.write_required(data=claims_to_check, cqid=self.id, action_if_exists=action_if_exists)
+ return fastrun_container.write_required(entity=self, property_filter=property_filter)
def __repr__(self):
"""A mixin implementing a simple __repr__."""
diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py
index 227fe2cf..e6a08f2a 100644
--- a/wikibaseintegrator/entities/item.py
+++ b/wikibaseintegrator/entities/item.py
@@ -133,7 +133,7 @@ def from_json(self, json_data: Dict[str, Any]) -> ItemEntity:
def write(self, **kwargs: Any) -> ItemEntity:
"""
Write the ItemEntity data to the Wikibase instance and return the ItemEntity object returned by the instance.
- extend :func:`~wikibaseintegrator.entities.BaseEntity._write`
+ This function extend :func:`~wikibaseintegrator.entities.baseentity.BaseEntity._write`
:param data: The serialized object that is used as the data source. A newly created entity will be assigned an 'id'.
:param summary: A summary of the edit
diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py
index 6c2b9548..287b3752 100644
--- a/wikibaseintegrator/models/claims.py
+++ b/wikibaseintegrator/models/claims.py
@@ -124,20 +124,22 @@ def __iter__(self):
class Claim(BaseModel):
+ """
+ extend :func:`wikibaseintegrator.models.basemodel.BaseModel`
+
+ :param qualifiers:
+ :param id:
+ :param rank:
+ :param references: A References object, a list of Claim object or a list of list of Claim object
+ """
DTYPE = 'claim'
- def __init__(self, qualifiers: Qualifiers = None, rank: WikibaseRank = None, references: Union[References, List[Union[Claim, List[Claim]]]] = None) -> None:
- """
-
- :param qualifiers:
- :param rank:
- :param references: A References object, a list of Claim object or a list of list of Claim object
- """
+ def __init__(self, qualifiers: Qualifiers = None, id: str = None, rank: WikibaseRank = None, references: Union[References, List[Union[Claim, List[Claim]]]] = None) -> None:
self.mainsnak = Snak(datatype=self.DTYPE)
self.type = 'statement'
self.qualifiers = qualifiers or Qualifiers()
self.qualifiers_order = []
- self.id = None
+ self.id = id
self.rank = rank or WikibaseRank.NORMAL
self.removed = False
@@ -358,5 +360,5 @@ def ref_equal(oldref: References, newref: References) -> bool:
return len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs)
- def get_sparql_value(self) -> str:
+ def get_sparql_value(self) -> Optional[str]:
pass
diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py
index 06986ba7..3b74d2d6 100644
--- a/wikibaseintegrator/models/qualifiers.py
+++ b/wikibaseintegrator/models/qualifiers.py
@@ -12,7 +12,7 @@
class Qualifiers(BaseModel):
def __init__(self):
- self.qualifiers: Dict[str, List[Snak]] = {}
+ self.qualifiers: Dict[str, List[Union[Snak, Claim]]] = {}
@property
def qualifiers(self):
diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py
index 77a392bb..f0d3686f 100644
--- a/wikibaseintegrator/models/references.py
+++ b/wikibaseintegrator/models/references.py
@@ -136,6 +136,9 @@ def get_json(self) -> Dict[str, Union[Dict, List]]:
}
return json_data
+ def __eq__(self, other):
+ return self.snaks == other.snaks
+
def __iter__(self):
return iter(self.snaks)
diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py
index eceaaef8..f9ebc06c 100644
--- a/wikibaseintegrator/models/snaks.py
+++ b/wikibaseintegrator/models/snaks.py
@@ -40,6 +40,9 @@ def get_json(self) -> Dict[str, List]:
json_data[property].append(snak.get_json())
return json_data
+ def __eq__(self, other):
+ return self.snaks == other.snaks
+
def __iter__(self):
iterate = []
for snak in self.snaks.values():
diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py
index f03c0ede..9a253d2d 100644
--- a/wikibaseintegrator/wbi_fastrun.py
+++ b/wikibaseintegrator/wbi_fastrun.py
@@ -1,21 +1,16 @@
from __future__ import annotations
-import collections
-import copy
import logging
-from collections import defaultdict
-from functools import lru_cache
-from itertools import chain
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Type, Union
+import re
+from typing import TYPE_CHECKING, Dict, List, Type, Union
from wikibaseintegrator.datatypes import BaseDataType
-from wikibaseintegrator.models import Claim
+from wikibaseintegrator.models import Claim, Claims, Qualifiers, Reference, References
from wikibaseintegrator.wbi_config import config
-from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatatype
-from wikibaseintegrator.wbi_helpers import execute_sparql_query, format_amount
+from wikibaseintegrator.wbi_helpers import execute_sparql_query
if TYPE_CHECKING:
- from wikibaseintegrator.models import Claims
+ from wikibaseintegrator.entities import BaseEntity
log = logging.getLogger(__name__)
@@ -23,641 +18,462 @@
class FastRunContainer:
- def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = None, sparql_endpoint_url: str = None, wikibase_url: str = None,
- base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False):
- self.reconstructed_statements: List[BaseDataType] = []
- self.rev_lookup: defaultdict[str, Set[str]] = defaultdict(set)
- self.rev_lookup_ci: defaultdict[str, Set[str]] = defaultdict(set)
- self.prop_data: Dict[str, Dict] = {}
- self.loaded_langs: Dict[str, Dict] = {}
- self.base_filter: List[BaseDataType | List[BaseDataType]] = []
- self.base_filter_string = ''
- self.prop_dt_map: Dict[str, str] = {}
-
- self.base_data_type: Type[BaseDataType] = base_data_type
- self.mediawiki_api_url: str = str(mediawiki_api_url or config['MEDIAWIKI_API_URL'])
- self.sparql_endpoint_url: str = str(sparql_endpoint_url or config['SPARQL_ENDPOINT_URL'])
- self.wikibase_url: str = str(wikibase_url or config['WIKIBASE_URL'])
- self.use_refs: bool = use_refs
- self.case_insensitive: bool = case_insensitive
-
- if base_filter and any(base_filter):
- self.base_filter = base_filter
- for k in self.base_filter:
- if isinstance(k, BaseDataType):
- if k.mainsnak.datavalue:
- self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> {entity} .\n'.format(
- wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number, entity=k.get_sparql_value().format(wb_url=self.wikibase_url))
- else:
- self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format(
- wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number)
- elif isinstance(k, list) and len(k) == 2 and isinstance(k[0], BaseDataType) and isinstance(k[1], BaseDataType):
- if k[0].mainsnak.datavalue:
- self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}>/<{wb_url}/prop/direct/{prop_nr2}>* {entity} .\n'.format(
- wb_url=self.wikibase_url, prop_nr=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number,
- entity=k[0].get_sparql_value().format(wb_url=self.wikibase_url))
- else:
- self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ?zz{prop_nr1}{prop_nr2} .\n'.format(
- wb_url=self.wikibase_url, prop_nr1=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number)
- else:
- raise ValueError("base_filter must be an instance of BaseDataType or a list of instances of BaseDataType")
+ """
- def reconstruct_statements(self, qid: str) -> List[BaseDataType]:
- reconstructed_statements: List[BaseDataType] = []
-
- if qid not in self.prop_data:
- self.reconstructed_statements = reconstructed_statements
- return reconstructed_statements
-
- for prop_nr, dt in self.prop_data[qid].items():
- # get datatypes for qualifier props
- q_props = set(chain(*([x[0] for x in d['qual']] for d in dt.values())))
- r_props = set(chain(*(set(chain(*([y[0] for y in x] for x in d['ref'].values()))) for d in dt.values())))
- props = q_props | r_props
- for prop in props:
- if prop not in self.prop_dt_map:
- self.prop_dt_map.update({prop: self.get_prop_datatype(prop)})
- # reconstruct statements from frc (including unit, qualifiers, and refs)
- for _, d in dt.items():
- qualifiers = []
- for q in d['qual']:
- f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[q[0]]][0]
- # TODO: Add support for more data type (Time, MonolingualText, GlobeCoordinate)
- if self.prop_dt_map[q[0]] == 'quantity':
- qualifiers.append(f(value=q[1], prop_nr=q[0], unit=q[2]))
- else:
- qualifiers.append(f(value=q[1], prop_nr=q[0]))
-
- references = []
- for _, refs in d['ref'].items():
- this_ref = []
- for ref in refs:
- f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[ref[0]]][0]
- this_ref.append(f(value=ref[1], prop_nr=ref[0]))
- references.append(this_ref)
-
- f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[prop_nr]][0]
- # TODO: Add support for more data type
- if self.prop_dt_map[prop_nr] == 'quantity':
- datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit'])
- datatype.parse_sparql_value(value=d['v'], unit=d['unit'])
- else:
- datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references)
- datatype.parse_sparql_value(value=d['v'])
- reconstructed_statements.append(datatype)
+ :param base_filter: The default filter to initialize the dataset. A list made of BaseDataType or list of BaseDataType.
+ :param base_data_type: The default data type to create objects.
+ :param use_qualifiers: Use qualifiers during fastrun. Enabled by default.
+ :param use_references: Use references during fastrun. Disabled by default.
+ :param use_cache: Put data returned by WDQS in cache. Enabled by default.
+ :param case_insensitive:
+ :param sparql_endpoint_url: SPARLQ endpoint URL.
+ :param wikibase_url: Wikibase URL used for the concept URI.
+ """
- # this isn't used. done for debugging purposes
- self.reconstructed_statements = reconstructed_statements
- return reconstructed_statements
+ # TODO: Add support for case_insensitive
- def get_items(self, claims: Union[List[Claim], Claims, Claim], cqid: str = None) -> Optional[Set[str]]:
- """
- Get items ID from a SPARQL endpoint
+ data: Dict[str, Dict[str, List[Dict[str, str]]]]
+
+ def __init__(self, base_filter: List[BaseDataType | List[BaseDataType]], base_data_type: Type[BaseDataType] = None, use_qualifiers: bool = True, use_references: bool = False,
+ use_cache: bool = True, case_insensitive: bool = False, sparql_endpoint_url: str = None, wikibase_url: str = None):
- :param claims: A list of claims the entities should have
- :param cqid:
- :return: a list of entity ID or None
- :exception: if there is more than one claim
+ for k in base_filter:
+ if not isinstance(k, BaseDataType) and not (isinstance(k, list) and len(k) == 2 and isinstance(k[0], BaseDataType) and isinstance(k[1], BaseDataType)):
+ raise ValueError("base_filter must be an instance of BaseDataType or a list of instances of BaseDataType")
+
+ self.data: Dict[str, Dict[str, List[Dict[str, str]]]] = {}
+
+ self.base_filter = base_filter
+ self.base_data_type = base_data_type or BaseDataType
+ self.sparql_endpoint_url = str(sparql_endpoint_url or config['SPARQL_ENDPOINT_URL'])
+ self.wikibase_url = str(wikibase_url or config['WIKIBASE_URL'])
+ self.use_qualifiers = use_qualifiers
+ self.use_references = use_references
+ self.use_cache = use_cache
+ self.case_insensitive = case_insensitive
+ self.properties_type: Dict[str, str] = {}
+
+ if self.case_insensitive:
+ raise ValueError("Case insensitive does not work for the moment.")
+
+ def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache: bool = None, wb_url: str = None, limit: int = 10000) -> None:
"""
- match_sets = []
+ Load the statements related to the given claims into the internal cache of the current object.
+ :param claims: A Claim, Claims or list of Claim
+ :param wb_url: The first part of the concept URI of entities.
+ :param limit: The limit to request at one time.
+ :param use_cache: Put data returned by WDQS in cache. Enabled by default.
+ :return:
+ """
if isinstance(claims, Claim):
claims = [claims]
elif (not isinstance(claims, list) or not all(isinstance(n, Claim) for n in claims)) and not isinstance(claims, Claims):
raise ValueError("claims must be an instance of Claim or Claims or a list of Claim")
- for claim in claims:
- # skip to next if statement has no value or no data type defined, e.g. for deletion objects
- if not claim.mainsnak.datavalue and not claim.mainsnak.datatype:
- continue
+ use_cache = bool(use_cache or self.use_cache)
+
+ wb_url = wb_url or self.wikibase_url
+ for claim in claims:
prop_nr = claim.mainsnak.property_number
- if prop_nr not in self.prop_dt_map:
- log.debug("%s not found in fastrun", prop_nr)
+ # Load each property from the Wikibase instance or the cache
+ if use_cache and prop_nr in self.data:
+ continue
+
+ offset = 0
- if isinstance(claim, BaseDataType) and type(claim) != BaseDataType: # pylint: disable=unidiomatic-typecheck
- self.prop_dt_map.update({prop_nr: claim.DTYPE})
+ # Generate base filter
+ base_filter_string = ''
+ for k in self.base_filter:
+ if isinstance(k, BaseDataType):
+ if k.mainsnak.datavalue:
+ base_filter_string += '?entity <{wb_url}/prop/direct/{prop_nr}> {entity} .\n'.format(
+ wb_url=wb_url, prop_nr=k.mainsnak.property_number, entity=k.get_sparql_value(wikibase_url=wb_url))
+ elif sum(map(lambda x, other=k: x.mainsnak.property_number == other.mainsnak.property_number, self.base_filter)) == 1: # type: ignore
+ base_filter_string += '?entity <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format(
+ wb_url=wb_url, prop_nr=k.mainsnak.property_number)
+ elif isinstance(k, list) and len(k) == 2 and isinstance(k[0], BaseDataType) and isinstance(k[1], BaseDataType):
+ if k[0].mainsnak.datavalue:
+ base_filter_string += '?entity <{wb_url}/prop/direct/{prop_nr}>/<{wb_url}/prop/direct/{prop_nr2}>* {entity} .\n'.format(
+ wb_url=wb_url, prop_nr=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number,
+ entity=k[0].get_sparql_value(wikibase_url=wb_url))
+ # TODO: Remove ?zzPYY if another filter have the same property number, the same as above
+ else:
+ base_filter_string += '?entity <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ?zz{prop_nr1}{prop_nr2} .\n'.format(
+ wb_url=wb_url, prop_nr1=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number)
else:
- self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)})
- self._query_data(prop_nr=prop_nr, use_units=self.prop_dt_map[prop_nr] == 'quantity')
+ raise ValueError("base_filter must be an instance of BaseDataType or a list of instances of BaseDataType")
- # noinspection PyProtectedMember
- current_value = claim.get_sparql_value()
+ qualifiers_filter_string = ''
+ if self.use_qualifiers:
+ for qualifier in claim.qualifiers:
+ fake_json = {
+ 'mainsnak': qualifier.get_json(),
+ 'type': qualifier.datatype,
+ 'id': 'Q0',
+ 'rank': 'normal'
+ }
+ f = [x for x in self.base_data_type.subclasses if x.DTYPE == qualifier.datatype][0]().from_json(json_data=fake_json)
+ qualifiers_filter_string += f'?sid pq:{qualifier.property_number} {f.get_sparql_value()}.\n'
+
+ # We force a refresh of the data, remove the previous results
+ self.data[prop_nr] = {}
+
+ while True:
+ query = '''
+ #Tool: WikibaseIntegrator wbi_fastrun.load_statements
+ SELECT ?entity ?sid ?value ?property_type WHERE {{
+ # Base filter string
+ {base_filter_string}
+ ?entity <{wb_url}/prop/{prop_nr}> ?sid.
+ <{wb_url}/entity/{prop_nr}> wikibase:propertyType ?property_type.
+ ?sid <{wb_url}/prop/statement/{prop_nr}> ?value.
+ {qualifiers_filter_string}
+ }}
+ ORDER BY ?sid
+ OFFSET {offset}
+ LIMIT {limit}
+ '''
- if self.prop_dt_map[prop_nr] == 'wikibase-item':
- current_value = claim.mainsnak.datavalue['value']['id']
+ # Format the query
+ query = query.format(base_filter_string=base_filter_string, wb_url=wb_url, prop_nr=prop_nr, offset=str(offset), limit=str(limit),
+ qualifiers_filter_string=qualifiers_filter_string)
+ offset += limit # We increase the offset for the next iteration
+ results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings']
- log.debug(current_value)
- # if self.case_insensitive:
- # log.debug("case insensitive enabled")
- # log.debug(self.rev_lookup_ci)
- # else:
- # log.debug(self.rev_lookup)
+ for result in results:
+ entity = result['entity']['value']
+ sid = result['sid']['value']
+ # value = result['value']['value']
+ property_type = result['property_type']['value']
- if current_value in self.rev_lookup:
- # quick check for if the value has ever been seen before, if not, write required
- match_sets.append(set(self.rev_lookup[current_value]))
- elif self.case_insensitive and current_value.casefold() in self.rev_lookup_ci:
- match_sets.append(set(self.rev_lookup_ci[current_value.casefold()]))
- else:
- log.debug("no matches for rev lookup for %s", current_value)
+ # Use casefold for lower case
+ if self.case_insensitive:
+ result['value']['value'] = result['value']['value'].casefold()
- if not match_sets:
- return None
+ f = [x for x in self.base_data_type.subclasses if x.PTYPE == property_type][0]().from_sparql_value(sparql_value=result['value'])
- if cqid:
- matching_qids = {cqid}
- else:
- matching_qids = match_sets[0].intersection(*match_sets[1:])
+ sparql_value = f.get_sparql_value()
+ if sparql_value is not None:
+ if sparql_value not in self.data[prop_nr]:
+ self.data[prop_nr][sparql_value] = []
- return matching_qids
+ if prop_nr not in self.properties_type:
+ self.properties_type[prop_nr] = property_type
- def get_item(self, claims: Union[List[Claim], Claims, Claim], cqid: str = None) -> Optional[str]:
- """
+ self.data[prop_nr][sparql_value].append({'entity': entity, 'sid': sid})
- :param claims: A list of claims the entity should have
- :param cqid:
- :return: An entity ID, None if there is more than one.
- """
+ if len(results) == 0 or len(results) < limit:
+ break
- matching_qids: Optional[Set[str]] = self.get_items(claims=claims, cqid=cqid)
+ def _load_qualifiers(self, sid: str, limit: int = 10000) -> Qualifiers:
+ """
+ Load the qualifiers of a statement.
- if matching_qids is None:
- return None
+ :param sid: A statement ID.
+ :param limit: The limit to request at one time.
+ :return: A Qualifiers object.
+ """
+ offset = 0
- # check if there are any items that have all of these values
- # if not, a write is required no matter what
- if not len(matching_qids) == 1:
- log.debug("no matches (%s)", len(matching_qids))
- return None
+ # We force a refresh of the data, remove the previous results
+ qualifiers: Qualifiers = Qualifiers()
+ while True:
+ query = f'''
+ #Tool: WikibaseIntegrator wbi_fastrun._load_qualifiers
+ SELECT ?property ?value ?property_type WHERE {{
+ VALUES ?sid {{ <{sid}> }}
+ ?sid ?predicate ?value.
+ ?property wikibase:qualifier ?predicate.
+ ?property wikibase:propertyType ?property_type.
+ }}
+ ORDER BY ?sid
+ OFFSET {offset}
+ LIMIT {limit}
+ '''
- return matching_qids.pop()
+ # Format the query
+ # query = query.format(wb_url=wb_url, sid=sid, offset=str(offset), limit=str(limit))
+ offset += limit # We increase the offset for the next iteration
+ results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings']
- def write_required(self, data: List[Claim], action_if_exists: ActionIfExists = ActionIfExists.REPLACE, cqid: str = None) -> bool:
- """
- Check if a write is required
+ for result in results:
+ property = result['property']['value']
+ property_type = result['property_type']['value']
- :param data:
- :param action_if_exists:
- :param cqid:
- :return: Return True if the write is required
- """
- del_props = set()
- data_props = set()
- append_props = []
- if action_if_exists == ActionIfExists.APPEND:
- append_props = [x.mainsnak.property_number for x in data]
-
- for x in data:
- if x.mainsnak.datavalue and x.mainsnak.datatype:
- data_props.add(x.mainsnak.property_number)
- qid = self.get_item(data, cqid)
-
- if not qid:
- return True
-
- reconstructed_statements = self.reconstruct_statements(qid)
- tmp_rs = copy.deepcopy(reconstructed_statements)
-
- # handle append properties
- for p in append_props:
- app_data = [x for x in data if x.mainsnak.property_number == p] # new statements
- rec_app_data = [x for x in tmp_rs if x.mainsnak.property_number == p] # orig statements
- comp = []
- for x in app_data:
- for y in rec_app_data:
- if x.mainsnak.datavalue == y.mainsnak.datavalue:
- if y.equals(x, include_ref=self.use_refs) and action_if_exists != ActionIfExists.FORCE_APPEND:
- comp.append(True)
-
- # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)]
- if len(comp) != len(app_data):
- log.debug("failed append: %s", p)
- return True
-
- tmp_rs = [x for x in tmp_rs if x.mainsnak.property_number not in append_props and x.mainsnak.property_number in data_props]
-
- for date in data:
- # ensure that statements meant for deletion get handled properly
- reconst_props = {x.mainsnak.property_number for x in tmp_rs}
- if not date.mainsnak.datatype and date.mainsnak.property_number in reconst_props:
- log.debug("returned from delete prop handling")
- return True
-
- if not date.mainsnak.datavalue or not date.mainsnak.datatype:
- # Ignore the deletion statements which are not in the reconstructed statements.
- continue
+ if property not in self.properties_type:
+ self.properties_type[property] = property_type
- if date.mainsnak.property_number in append_props:
- # TODO: check if value already exist and already have the same value
- continue
+ # Use casefold for lower case
+ if self.case_insensitive:
+ result['value']['value'] = result['value']['value'].casefold()
- if not date.mainsnak.datavalue and not date.mainsnak.datatype:
- del_props.add(date.mainsnak.property_number)
+ f = [x for x in self.base_data_type.subclasses if x.PTYPE == property_type][0](prop_nr=property).from_sparql_value(sparql_value=result['value'])
+ qualifiers.add(f)
- # this is where the magic happens
- # date is a new statement, proposed to be written
- # tmp_rs are the reconstructed statements == current state of the item
- bool_vec = []
- for x in tmp_rs:
- if (x == date or (self.case_insensitive and x.mainsnak.datavalue.casefold() == date.mainsnak.datavalue.casefold())) and x.mainsnak.property_number not in del_props:
- bool_vec.append(x.equals(date, include_ref=self.use_refs))
- else:
- bool_vec.append(False)
- # bool_vec = [x.equals(date, include_ref=self.use_refs, fref=self.ref_comparison_f) and
- # x.mainsnak.property_number not in del_props for x in tmp_rs]
-
- log.debug("bool_vec: %s", bool_vec)
- log.debug("-----------------------------------")
- for x in tmp_rs:
- if x == date and x.mainsnak.property_number not in del_props:
- log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]])
- log.debug([date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]])
- elif x.mainsnak.property_number == date.mainsnak.property_number:
- log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]])
- log.debug([date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]])
-
- if not any(bool_vec):
- log.debug(len(bool_vec))
- log.debug("fast run failed at %s", date.mainsnak.property_number)
- return True
-
- log.debug("fast run success")
- tmp_rs.pop(bool_vec.index(True))
-
- if len(tmp_rs) > 0:
- log.debug("failed because not zero")
- for x in tmp_rs:
- log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]])
- log.debug("failed because not zero--END")
- return True
+ if len(results) == 0 or len(results) < limit:
+ break
- return False
+ return qualifiers
- def init_language_data(self, lang: str, lang_data_type: str) -> None:
+ def _load_references(self, sid: str, limit: int = 10000) -> References:
"""
- Initialize language data store
+ Load the references of a statement.
- :param lang: language code
- :param lang_data_type: 'label', 'description' or 'aliases'
- :return: None
+ :param sid: A statement ID.
+ :param limit: The limit to request at one time.
+ :return: A References object.
"""
- if lang not in self.loaded_langs:
- self.loaded_langs[lang] = {}
+ offset = 0
- if lang_data_type not in self.loaded_langs[lang]:
- result = self._query_lang(lang=lang, lang_data_type=lang_data_type)
- if result is not None:
- data = self._process_lang(result=result)
- self.loaded_langs[lang].update({lang_data_type: data})
+ if not isinstance(sid, str):
+ raise ValueError('sid must be a string')
- def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> List[str]:
- """
- get language data for specified qid
-
- :param qid: Wikibase item id
- :param lang: language code
- :param lang_data_type: 'label', 'description' or 'aliases'
- :return: list of strings
- If nothing is found:
- If lang_data_type == label: returns ['']
- If lang_data_type == description: returns ['']
- If lang_data_type == aliases: returns []
- """
- self.init_language_data(lang, lang_data_type)
+ # We force a refresh of the data, remove the previous results
+ references: References = References()
+ while True:
+ query = f'''
+ #Tool: WikibaseIntegrator wbi_fastrun._load_references
+ SELECT ?srid ?ref_property ?ref_value ?property_type WHERE {{
+ VALUES ?sid {{ <{sid}> }}
+
+ ?sid prov:wasDerivedFrom ?srid.
+ ?srid ?ref_predicate ?ref_value.
+ ?ref_property wikibase:reference ?ref_predicate.
+ ?ref_property wikibase:propertyType ?property_type.
+ }}
+ ORDER BY ?srid
+ OFFSET {offset}
+ LIMIT {limit}
+ '''
- current_lang_data = self.loaded_langs[lang][lang_data_type]
- all_lang_strings = current_lang_data.get(qid, [])
- if not all_lang_strings and lang_data_type in {'label', 'description'}:
- all_lang_strings = ['']
- return all_lang_strings
+ # Format the query
+ # query = query.format(wb_url=wb_url, sid=sid, offset=str(offset), limit=str(limit))
+ offset += limit # We increase the offset for the next iteration
+ results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings']
- def check_language_data(self, qid: str, lang_data: List, lang: str, lang_data_type: str, action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> bool:
- """
- Method to check if certain language data exists as a label, description or aliases
- :param qid: Wikibase item id
- :param lang_data: list of string values to check
- :param lang: language code
- :param lang_data_type: What kind of data is it? 'label', 'description' or 'aliases'?
- :param action_if_exists: If aliases already exist, APPEND or REPLACE
- :return: boolean
- """
- all_lang_strings = {x.strip().casefold() for x in self.get_language_data(qid, lang, lang_data_type)}
+ reference = {}
- if action_if_exists == ActionIfExists.REPLACE:
- return collections.Counter(all_lang_strings) != collections.Counter(map(lambda x: x.casefold(), lang_data))
+ for result in results:
+ ref_property = result['ref_property']['value']
+ srid = result['srid']['value']
+ property_type = result['property_type']['value']
- for s in lang_data:
- if s.strip().casefold() not in all_lang_strings:
- log.debug("fastrun failed at: %s, string: %s", lang_data_type, s)
- return True
+ if ref_property not in self.properties_type:
+ self.properties_type[ref_property] = property_type
- return False
+ # Use casefold for lower case
+ if self.case_insensitive:
+ result['value']['value'] = result['value']['value'].casefold()
- def get_all_data(self) -> Dict[str, Dict]:
- return self.prop_data
+ f = [x for x in self.base_data_type.subclasses if x.PTYPE == property_type][0](prop_nr=ref_property).from_sparql_value(sparql_value=result['ref_value'])
- def format_query_results(self, r: List, prop_nr: str) -> None:
- """
- `r` is the results of the sparql query in _query_data and is modified in place
- `prop_nr` is needed to get the property datatype to determine how to format the value
-
- `r` is a list of dicts. The keys are:
- sid: statement ID
- item: the subject. the item this statement is on
- v: the object. The value for this statement
- unit: property unit
- pq: qualifier property
- qval: qualifier value
- qunit: qualifier unit
- ref: reference ID
- pr: reference property
- rval: reference value
- """
- prop_dt = self.get_prop_datatype(prop_nr)
- for i in r:
- for value in ['item', 'sid', 'pq', 'pr', 'ref', 'unit', 'qunit']:
- if value in i:
- if i[value]['value'].startswith(self.wikibase_url):
- i[value] = i[value]['value'].split('/')[-1]
- else:
- # TODO: Dirty fix. If we are not on wikidata, we force unitless (Q199) to '1'
- if i[value]['value'] == 'http://www.wikidata.org/entity/Q199':
- i[value] = '1'
- else:
- i[value] = i[value]['value']
-
- # make sure datetimes are formatted correctly.
- # the correct format is '+%Y-%m-%dT%H:%M:%SZ', but is sometimes missing the plus??
- # some difference between RDF and xsd:dateTime that I don't understand
- for value in ['v', 'qval', 'rval']:
- if value in i:
- if i[value].get("datatype") == 'http://www.w3.org/2001/XMLSchema#dateTime' and not i[value]['value'][0] in '+-':
- # if it is a dateTime and doesn't start with plus or minus, add a plus
- i[value]['value'] = '+' + i[value]['value']
-
- # these three ({'v', 'qval', 'rval'}) are values that can be any data type
- # strip off the URI if they are wikibase-items
- if 'v' in i:
- if i['v']['type'] == 'uri' and prop_dt == 'wikibase-item':
- i['v'] = i['v']['value'].split('/')[-1]
- elif i['v']['type'] == 'literal' and prop_dt == 'quantity':
- i['v'] = format_amount(i['v']['value'])
- elif i['v']['type'] == 'literal' and prop_dt == 'monolingualtext':
- f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr, text=i['v']['value'], language=i['v']['xml:lang'])
- i['v'] = f.get_sparql_value()
- else:
- f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr)
- if not f.parse_sparql_value(value=i['v']['value'], type=i['v']['type']):
- raise ValueError("Can't parse the value with parse_sparql_value()")
- i['v'] = f.get_sparql_value()
-
- # Note: no-value and some-value don't actually show up in the results here
- # see for example: select * where { wd:Q7207 p:P40 ?c . ?c ?d ?e }
- if not isinstance(i['v'], dict):
- self.rev_lookup[i['v']].add(i['item'])
- if self.case_insensitive:
- self.rev_lookup_ci[i['v'].casefold()].add(i['item'])
-
- # handle qualifier value
- if 'qval' in i:
- qual_prop_dt = self.get_prop_datatype(prop_nr=i['pq'])
- if i['qval']['type'] == 'uri' and qual_prop_dt == 'wikibase-item':
- i['qval'] = i['qval']['value'].split('/')[-1]
- elif i['qval']['type'] == 'literal' and qual_prop_dt == 'quantity':
- i['qval'] = format_amount(i['qval']['value'])
- else:
- i['qval'] = i['qval']['value']
-
- # handle reference value
- if 'rval' in i:
- ref_prop_dt = self.get_prop_datatype(prop_nr=i['pr'])
- if i['rval']['type'] == 'uri' and ref_prop_dt == 'wikibase-item':
- i['rval'] = i['rval']['value'].split('/')[-1]
- elif i['rval']['type'] == 'literal' and ref_prop_dt == 'quantity':
- i['rval'] = format_amount(i['rval']['value'])
- else:
- i['rval'] = i['rval']['value']
-
- def update_frc_from_query(self, r: List, prop_nr: str) -> None:
- # r is the output of format_query_results
- # this updates the frc from the query (result of _query_data)
- for i in r:
- qid = i['item']
- if qid not in self.prop_data:
- self.prop_data[qid] = {prop_nr: {}}
- if prop_nr not in self.prop_data[qid]:
- self.prop_data[qid].update({prop_nr: {}})
- if i['sid'] not in self.prop_data[qid][prop_nr]:
- self.prop_data[qid][prop_nr].update({i['sid']: {}})
- # update values for this statement (not including ref)
- d = {'v': i['v']}
- self.prop_data[qid][prop_nr][i['sid']].update(d)
-
- if 'qual' not in self.prop_data[qid][prop_nr][i['sid']]:
- self.prop_data[qid][prop_nr][i['sid']]['qual'] = set()
- if 'pq' in i and 'qval' in i:
- if 'qunit' in i:
- self.prop_data[qid][prop_nr][i['sid']]['qual'].add((i['pq'], i['qval'], i['qunit']))
- else:
- self.prop_data[qid][prop_nr][i['sid']]['qual'].add((i['pq'], i['qval'], '1'))
-
- if 'ref' not in self.prop_data[qid][prop_nr][i['sid']]:
- self.prop_data[qid][prop_nr][i['sid']]['ref'] = {}
- if 'ref' in i:
- if i['ref'] not in self.prop_data[qid][prop_nr][i['sid']]['ref']:
- self.prop_data[qid][prop_nr][i['sid']]['ref'][i['ref']] = set()
- self.prop_data[qid][prop_nr][i['sid']]['ref'][i['ref']].add((i['pr'], i['rval']))
+ if srid not in reference:
+ reference[srid] = Reference()
- if 'unit' not in self.prop_data[qid][prop_nr][i['sid']]:
- self.prop_data[qid][prop_nr][i['sid']]['unit'] = '1'
- if 'unit' in i:
- self.prop_data[qid][prop_nr][i['sid']]['unit'] = i['unit']
+ reference[srid].add(f)
- def _query_data(self, prop_nr: str, use_units: bool = False, page_size: int = 10000) -> None:
- page_count = 0
+ # Add each Reference to the References
+ for _, ref in reference.items():
+ references.add(ref)
- while True:
- # Query header
- query = '''
- #Tool: WikibaseIntegrator wbi_fastrun._query_data
- SELECT ?sid ?item ?v ?unit ?pq ?qval ?qunit ?ref ?pr ?rval
- WHERE
- {{
- '''
+ if len(results) == 0 or len(results) < limit:
+ break
- # Base filter
- query += '''
- {base_filter}
+ return references
- ?item <{wb_url}/prop/{prop_nr}> ?sid .
- '''
+ def _get_property_type(self, prop_nr: Union[str, int]) -> str:
+ """
+ Obtain the property type of the given property by looking at the SPARQL endpoint.
- # Amount and unit
- if use_units:
- query += '''
- {{
- <{wb_url}/entity/{prop_nr}> wikibase:propertyType ?property_type .
- FILTER (?property_type != wikibase:Quantity)
- ?sid <{wb_url}/prop/statement/{prop_nr}> ?v .
- }}
- # Get amount and unit for the statement
- UNION
- {{
- ?sid <{wb_url}/prop/statement/value/{prop_nr}> [wikibase:quantityAmount ?v; wikibase:quantityUnit ?unit] .
- }}
- '''
- else:
- query += '''
- <{wb_url}/entity/{prop_nr}> wikibase:propertyType ?property_type .
- ?sid <{wb_url}/prop/statement/{prop_nr}> ?v .
- '''
+ :param prop_nr: The property number.
+ :return: The SPARQL version of the property type.
+ """
+ if isinstance(prop_nr, int):
+ prop_nr = 'P' + str(prop_nr)
+ elif prop_nr is not None:
+ pattern = re.compile(r'^P?([0-9]+)$')
+ matches = pattern.match(prop_nr)
- # Qualifiers
- # Amount and unit
- if use_units:
- query += '''
- # Get qualifiers
- OPTIONAL
- {{
- {{
- # Get simple values for qualifiers which are not of type quantity
- ?sid ?propQualifier ?qval .
- ?pq wikibase:qualifier ?propQualifier .
- ?pq wikibase:propertyType ?qualifer_property_type .
- FILTER (?qualifer_property_type != wikibase:Quantity)
- }}
- UNION
- {{
- # Get amount and unit for qualifiers of type quantity
- ?sid ?pqv [wikibase:quantityAmount ?qval; wikibase:quantityUnit ?qunit] .
- ?pq wikibase:qualifierValue ?pqv .
- }}
- }}
- '''
- else:
- query += '''
- # Get qualifiers
- OPTIONAL
- {{
- # Get simple values for qualifiers
- ?sid ?propQualifier ?qval .
- ?pq wikibase:qualifier ?propQualifier .
- ?pq wikibase:propertyType ?qualifer_property_type .
- }}
- '''
+ if not matches:
+ raise ValueError('Invalid prop_nr, format must be "P[0-9]+"')
- # References
- if self.use_refs:
- query += '''
- # get references
- OPTIONAL {{
- ?sid prov:wasDerivedFrom ?ref .
- ?ref ?pr ?rval .
- [] wikibase:reference ?pr
- }}
- '''
- # Query footer
- query += '''
- }} ORDER BY ?sid OFFSET {offset} LIMIT {page_size}
- '''
+ prop_nr = 'P' + str(matches.group(1))
- # Format the query
- query = query.format(wb_url=self.wikibase_url, base_filter=self.base_filter_string, prop_nr=prop_nr, offset=str(page_count * page_size), page_size=str(page_size))
+ query = f'''#Tool: WikibaseIntegrator wbi_fastrun._get_property_type
+ SELECT ?property_type WHERE {{ wd:{prop_nr} wikibase:propertyType ?property_type. }}'''
- results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings']
- self.format_query_results(results, prop_nr)
- self.update_frc_from_query(results, prop_nr)
- page_count += 1
+ results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'][0]['property_type']['value']
- if len(results) == 0 or len(results) < page_size:
- break
+ return results
- def _query_lang(self, lang: str, lang_data_type: str) -> Optional[List[Dict[str, Dict]]]:
+ def get_entities(self, claims: Union[List[Claim], Claims, Claim], use_cache: bool = None) -> List[str]:
"""
+ Return a list of entities who correspond to the specified claims.
- :param lang:
- :param lang_data_type:
+ :param claims: A list of claims to query the SPARQL endpoint.
+ :param use_cache: Put data returned by WDQS in cache. Enabled by default.
+ :return: A list of entity ID.
"""
+ if isinstance(claims, Claim):
+ claims = [claims]
+ elif (not isinstance(claims, list) or not all(isinstance(n, Claim) for n in claims)) and not isinstance(claims, Claims):
+ raise ValueError("claims must be an instance of Claim or Claims or a list of Claim")
+
+ self.load_statements(claims=claims, use_cache=use_cache)
- lang_data_type_dict = {
- 'label': 'rdfs:label',
- 'description': 'schema:description',
- 'aliases': 'skos:altLabel'
- }
+ result = set()
+ for claim in claims:
+ # Add the returned entities to the result list
+ for dat in self.data[claim.mainsnak.property_number]:
+ for rez in self.data[claim.mainsnak.property_number][dat]:
+ result.add(rez['entity'].rsplit('/', 1)[-1])
- query = f'''
- #Tool: WikibaseIntegrator wbi_fastrun._query_lang
- SELECT ?item ?label WHERE {{
- {self.base_filter_string}
+ return list(result)
- OPTIONAL {{
- ?item {lang_data_type_dict[lang_data_type]} ?label FILTER (lang(?label) = "{lang}") .
- }}
- }}
- '''
-
- log.debug(query)
-
- return execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings']
-
- @staticmethod
- def _process_lang(result: List) -> defaultdict[str, set]:
- data = defaultdict(set)
- for r in result:
- qid = r['item']['value'].split("/")[-1]
- if 'label' in r:
- data[qid].add(r['label']['value'])
- return data
-
- @lru_cache(maxsize=100000)
- def get_prop_datatype(self, prop_nr: str) -> Optional[str]: # pylint: disable=no-self-use
- from wikibaseintegrator import WikibaseIntegrator
- wbi = WikibaseIntegrator()
- property = wbi.property.get(prop_nr)
- datatype = property.datatype
- if isinstance(datatype, WikibaseDatatype):
- return datatype.value
- return datatype
-
- def clear(self) -> None:
+ def write_required(self, entity: BaseEntity, property_filter: Union[List[str], str, None] = None, use_qualifiers: bool = None, use_references: bool = None,
+ use_cache: bool = None) -> bool:
"""
- convenience function to empty this fastrun container
+
+ :param entity:
+ :param property_filter:
+ :param use_qualifiers: Use qualifiers during fastrun. Enabled by default.
+ :param use_references: Use references during fastrun. Disabled by default.
+ :param use_cache: Put data returned by WDQS in cache. Enabled by default.
+ :return: a boolean True if a write is required. False otherwise.
"""
- self.prop_dt_map = {}
- self.prop_data = {}
- self.rev_lookup = defaultdict(set)
- self.rev_lookup_ci = defaultdict(set)
+ from wikibaseintegrator.entities import BaseEntity
+
+ if not isinstance(entity, BaseEntity):
+ raise ValueError("entity must be an instance of BaseEntity")
+
+ if len(entity.claims) == 0:
+ raise ValueError("entity must have at least one claim")
+
+ if property_filter is not None and isinstance(property_filter, str):
+ property_filter = [property_filter]
+
+ # Generate a property_filter if None is given
+ if property_filter is None:
+ property_filter = [claim.mainsnak.property_number for claim in entity.claims]
+
+ use_qualifiers = bool(use_qualifiers or self.use_qualifiers)
+ use_references = bool(use_references or self.use_references)
+
+ def contains(in_list, lambda_filter):
+ for x in in_list:
+ if lambda_filter(x):
+ return True
+ return False
+
+ # Get all the potential statements
+ statements_to_check: Dict[str, List[str]] = {}
+ for claim in entity.claims:
+ if claim.mainsnak.property_number in property_filter:
+ self.load_statements(claims=claim, use_cache=use_cache)
+ if claim.mainsnak.property_number in self.data:
+ if not contains(self.data[claim.mainsnak.property_number], (lambda x, c=claim: x == c.get_sparql_value())):
+ # Found if a property with this value does not exist, return True if none exist
+ logging.debug("Value '%s' does not exist for property '%s'", claim.get_sparql_value(), claim.mainsnak.property_number)
+ return True
+
+ for statement in self.data[claim.mainsnak.property_number][claim.get_sparql_value()]:
+ if claim.mainsnak.property_number not in statements_to_check:
+ statements_to_check[claim.mainsnak.property_number] = []
+ statements_to_check[claim.mainsnak.property_number].append(statement['entity'])
+
+ # Generate an intersection between all the statements by property, based on the entity
+ # Generate only the list of entities
+ list_entities: List[List[str]] = []
+ for _, statements in statements_to_check.items():
+ # entities = [statement['entity'] for statement in statements_to_check[property]]
+ list_entities.append(list(set(statements)))
+
+ # Return the intersection between all the list
+ common_entities: List = list_entities.pop()
+ for entities in list_entities:
+ common_entities = list(set(common_entities).intersection(entities))
+
+ # If the property is already found, load it completely to compare deeply
+ for claim in entity.claims:
+ if claim.mainsnak.property_number in property_filter:
+ sparql_value: str = claim.get_sparql_value()
+ if claim.mainsnak.property_number in self.data and sparql_value in self.data[claim.mainsnak.property_number]:
+ for statement in self.data[claim.mainsnak.property_number][sparql_value]:
+ if statement['entity'] in common_entities:
+ if use_qualifiers:
+ qualifiers = self._load_qualifiers(statement['sid'], limit=100)
+
+ if len(qualifiers) != len(claim.qualifiers):
+ logging.debug("Difference in number of qualifiers, '%i' != '%i'", len(qualifiers), len(claim.qualifiers))
+ return True
+
+ for qualifier in qualifiers:
+ if qualifier not in claim.qualifiers:
+ logging.debug("Difference between two qualifiers")
+ return True
+
+ if use_references:
+ references = self._load_references(statement['sid'], limit=100)
+
+ if len(references) != len(claim.references):
+ logging.debug("Difference in number of references, '%i' != '%i'", len(references), len(claim.references))
+ return True
+
+ for reference in references:
+ if reference not in claim.references:
+ logging.debug("Difference between two references")
+ return True
+
+ return False
- def __repr__(self) -> str:
- """A mixin implementing a simple __repr__."""
- return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string
- klass=self.__class__.__name__,
- id=id(self) & 0xFFFFFF,
- attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()),
- )
+def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]], use_qualifiers: bool = True, use_references: bool = False, use_cache: bool = True,
+ case_insensitive: bool = False) -> FastRunContainer:
+ """
+ Return a FastRunContainer object, create a new one if it doesn't already exist.
-def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer:
+ :param base_filter: The default filter to initialize the dataset. A list made of BaseDataType or list of BaseDataType.
+ :param use_qualifiers: Use qualifiers during fastrun. Enabled by default.
+ :param use_references: Use references during fastrun. Disabled by default.
+ :param use_cache: Put data returned by WDQS in cache. Enabled by default.
+ :param case_insensitive:
+ :return: a FastRunContainer object
+ """
if base_filter is None:
base_filter = []
# We search if we already have a FastRunContainer with the same parameters to re-use it
- fastrun_container = _search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive)
+ fastrun_container = _search_fastrun_store(base_filter=base_filter, use_qualifiers=use_qualifiers, use_references=use_references, case_insensitive=case_insensitive,
+ use_cache=use_cache)
return fastrun_container
-def _search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer:
+def _search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]], use_qualifiers: bool = True, use_references: bool = False, use_cache: bool = True,
+ case_insensitive: bool = False) -> FastRunContainer:
+ """
+ Search for an existing FastRunContainer with the same parameters or create a new one if it doesn't exist.
+
+ :param base_filter: The default filter to initialize the dataset. A list made of BaseDataType or list of BaseDataType.
+ :param use_qualifiers: Use qualifiers during fastrun. Enabled by default.
+ :param use_references: Use references during fastrun. Disabled by default.
+ :param use_cache: Put data returned by WDQS in cache. Enabled by default.
+ :param case_insensitive:
+ :return: a FastRunContainer object
+ """
for fastrun in fastrun_store:
- if (fastrun.base_filter == base_filter) and (fastrun.use_refs == use_refs) and (fastrun.case_insensitive == case_insensitive) and (
- fastrun.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
+ if (fastrun.base_filter == base_filter) and (fastrun.use_qualifiers == use_qualifiers) and (fastrun.use_references == use_references) and (
+ fastrun.case_insensitive == case_insensitive) and (fastrun.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
+ fastrun.use_cache = use_cache
return fastrun
# In case nothing was found in the fastrun_store
log.info("Create a new FastRunContainer")
- fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive)
+ fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_qualifiers=use_qualifiers, use_references=use_references, use_cache=use_cache,
+ case_insensitive=case_insensitive)
fastrun_store.append(fastrun_container)
return fastrun_container