Skip to content

Commit

Permalink
Add query_limit parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
LeMyst committed May 7, 2023
1 parent 25ddf63 commit 2f17582
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
3 changes: 2 additions & 1 deletion wikibaseintegrator/wbi_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@
'SPARQL_ENDPOINT_URL': 'https://query.wikidata.org/sparql',
'WIKIBASE_URL': 'http://www.wikidata.org',
'DEFAULT_LANGUAGE': 'en',
'DEFAULT_LEXEME_LANGUAGE': 'Q1860'
'DEFAULT_LEXEME_LANGUAGE': 'Q1860',
'SPARQL_QUERY_LIMIT': 10000
}
18 changes: 12 additions & 6 deletions wikibaseintegrator/wbi_fastrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self, base_filter: List[BaseDataType | List[BaseDataType]], base_da
if self.case_insensitive:
raise ValueError("Case insensitive does not work for the moment.")

def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None, wb_url: Optional[str] = None, limit: int = 10000) -> None:
def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None, wb_url: Optional[str] = None, limit: Optional[int] = None) -> None:
"""
Load the statements related to the given claims into the internal cache of the current object.
Expand All @@ -75,6 +75,8 @@ def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache:

wb_url = wb_url or self.wikibase_url

limit = limit or int(config['SPARQL_QUERY_LIMIT']) # type: ignore

for claim in claims:
prop_nr = claim.mainsnak.property_number

Expand Down Expand Up @@ -168,7 +170,7 @@ def load_statements(self, claims: Union[List[Claim], Claims, Claim], use_cache:
if len(results) == 0 or len(results) < limit:
break

def _load_qualifiers(self, sid: str, limit: int = 10000) -> Qualifiers:
def _load_qualifiers(self, sid: str, limit: Optional[int] = None) -> Qualifiers:
"""
Load the qualifiers of a statement.
Expand All @@ -178,6 +180,8 @@ def _load_qualifiers(self, sid: str, limit: int = 10000) -> Qualifiers:
"""
offset = 0

limit = limit or int(config['SPARQL_QUERY_LIMIT']) # type: ignore

# We force a refresh of the data, remove the previous results
qualifiers: Qualifiers = Qualifiers()
while True:
Expand Down Expand Up @@ -309,20 +313,21 @@ def _get_property_type(self, prop_nr: Union[str, int]) -> str:

return results

def get_entities(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None) -> List[str]:
def get_entities(self, claims: Union[List[Claim], Claims, Claim], use_cache: Optional[bool] = None, query_limit: Optional[int] = None) -> List[str]:
"""
Return a list of entities who correspond to the specified claims.
:param claims: A list of claims to query the SPARQL endpoint.
:param use_cache: Put data returned by WDQS in cache. Enabled by default.
:param query_limit: Limit the amount of results from the SPARQL server
:return: A list of entity ID.
"""
if isinstance(claims, Claim):
claims = [claims]
elif (not isinstance(claims, list) or not all(isinstance(n, Claim) for n in claims)) and not isinstance(claims, Claims):
raise ValueError("claims must be an instance of Claim or Claims or a list of Claim")

self.load_statements(claims=claims, use_cache=use_cache)
self.load_statements(claims=claims, use_cache=use_cache, limit=query_limit)

result = set()
for claim in claims:
Expand All @@ -334,14 +339,15 @@ def get_entities(self, claims: Union[List[Claim], Claims, Claim], use_cache: Opt
return list(result)

def write_required(self, entity: BaseEntity, property_filter: Union[List[str], str, None] = None, use_qualifiers: Optional[bool] = None, use_references: Optional[bool] = None,
use_cache: Optional[bool] = None) -> bool:
use_cache: Optional[bool] = None, query_limit: Optional[int] = None) -> bool:
"""
:param entity:
:param property_filter:
:param use_qualifiers: Use qualifiers during fastrun. Enabled by default.
:param use_references: Use references during fastrun. Disabled by default.
:param use_cache: Put data returned by WDQS in cache. Enabled by default.
:param query_limit: Limit the amount of results from the SPARQL server
:return: a boolean True if a write is required. False otherwise.
"""
from wikibaseintegrator.entities import BaseEntity
Expand Down Expand Up @@ -372,7 +378,7 @@ def contains(in_list, lambda_filter):
statements_to_check: Dict[str, List[str]] = {}
for claim in entity.claims:
if claim.mainsnak.property_number in property_filter:
self.load_statements(claims=claim, use_cache=use_cache)
self.load_statements(claims=claim, use_cache=use_cache, limit=query_limit)
if claim.mainsnak.property_number in self.data:
if not contains(self.data[claim.mainsnak.property_number], (lambda x, c=claim: x == c.get_sparql_value())):
# Found if a property with this value does not exist, return True if none exist
Expand Down

0 comments on commit 2f17582

Please sign in to comment.