forked from gijswobben/pymed
-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
141 additions
and
100 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,7 @@ jobs: | |
- "3.9" | ||
- "3.10" | ||
- "3.11" | ||
- '3.12' | ||
- "3.12" | ||
os: | ||
- "ubuntu" | ||
# - 'macos' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
"""API module for PubMed.""" | ||
import datetime | ||
import itertools | ||
import xml.etree.ElementTree as xml | ||
|
@@ -15,26 +16,30 @@ | |
|
||
|
||
class PubMed: | ||
"""Wrapper around the PubMed API.""" | ||
"""Wrap around the PubMed API.""" | ||
|
||
def __init__( | ||
self, | ||
tool: str = "my_tool", | ||
email: str = "[email protected]", | ||
) -> None: | ||
"""Initialization of the object. | ||
Parameters: | ||
- tool String, name of the tool that is executing the query. | ||
This parameter is not required but kindly requested by | ||
PMC (PubMed Central). | ||
- email String, email of the user of the tool. This parameter | ||
is not required but kindly requested by PMC (PubMed Central). | ||
Returns: | ||
- None | ||
""" | ||
|
||
Initialize the PubMed object. | ||
Parameters | ||
---------- | ||
tool: String | ||
name of the tool that is executing the query. | ||
This parameter is not required but kindly requested by | ||
PMC (PubMed Central). | ||
email: String | ||
email of the user of the tool. This parameter | ||
is not required but kindly requested by PMC (PubMed Central). | ||
Returns | ||
------- | ||
None | ||
""" | ||
# Store the input parameters | ||
self.tool = tool | ||
self.email = email | ||
|
@@ -53,17 +58,21 @@ def query( | |
max_date: str, | ||
max_results: int = 100, | ||
) -> Iterable[Union[PubMedArticle, PubMedBookArticle]]: | ||
"""Method that executes a query agains the GraphQL schema, automatically | ||
inserting the PubMed data loader. | ||
""" | ||
Execute a query agains the GraphQL schema. | ||
Parameters: | ||
- query String, the GraphQL query to execute against the schema. | ||
Automatically inserting the PubMed data loader. | ||
Returns: | ||
- result ExecutionResult, GraphQL object that contains the result | ||
in the "data" attribute. | ||
""" | ||
Parameters | ||
---------- | ||
query: String | ||
the GraphQL query to execute against the schema. | ||
Returns | ||
------- | ||
result: ExecutionResult | ||
GraphQL object that contains the result in the "data" attribute. | ||
""" | ||
# Retrieve the article IDs for the query | ||
article_ids = self._getArticleIds( | ||
query=query, | ||
|
@@ -84,15 +93,19 @@ def query( | |
return itertools.chain.from_iterable(articles) | ||
|
||
def getTotalResultsCount(self, query: str) -> int: | ||
"""Helper method that returns the total number of results that match the query. | ||
""" | ||
Return the total number of results that match the query. | ||
Parameters: | ||
- query String, the query to send to PubMed | ||
Parameters | ||
---------- | ||
query: String | ||
the query to send to PubMed | ||
Returns: | ||
- total_results_count Int, total number of results for the query in PubMed | ||
Returns | ||
------- | ||
total_results_count: Int | ||
total number of results for the query in PubMed | ||
""" | ||
|
||
# Get the default parameters | ||
parameters = self.parameters.copy() | ||
|
||
|
@@ -105,7 +118,8 @@ def getTotalResultsCount(self, query: str) -> int: | |
url="/entrez/eutils/esearch.fcgi", parameters=parameters | ||
) | ||
|
||
# Get from the returned meta data the total number of available results for the query | ||
# Get from the returned meta data the total number of available | ||
# results for the query | ||
total_results_count = int( | ||
response.get("esearchresult", {}).get("count") | ||
) | ||
|
@@ -114,12 +128,14 @@ def getTotalResultsCount(self, query: str) -> int: | |
return total_results_count | ||
|
||
def _exceededRateLimit(self) -> bool: | ||
"""Helper method to check if we've exceeded the rate limit. | ||
Returns: | ||
- exceeded Bool, Whether or not the rate limit is exceeded. | ||
""" | ||
Check if we've exceeded the rate limit. | ||
Returns | ||
------- | ||
exceeded: Bool | ||
Whether or not the rate limit is exceeded. | ||
""" | ||
# Remove requests from the list that are longer than 1 second ago | ||
self._requestsMade = [ | ||
requestTime | ||
|
@@ -128,7 +144,8 @@ def _exceededRateLimit(self) -> bool: | |
> datetime.datetime.now() - datetime.timedelta(seconds=1) | ||
] | ||
|
||
# Return whether we've made more requests in the last second, than the rate limit | ||
# Return whether we've made more requests in the last second, | ||
# than the rate limit | ||
return len(self._requestsMade) > self._rateLimit | ||
|
||
def _get( | ||
|
@@ -137,21 +154,26 @@ def _get( | |
parameters: Dict[Any, Any] = dict(), | ||
output: str = "json", | ||
) -> Union[str, requests.models.Response]: | ||
"""Generic helper method that makes a request to PubMed. | ||
Parameters: | ||
- url Str, last part of the URL that is requested (will | ||
be combined with the base url) | ||
- parameters Dict, parameters to use for the request | ||
- output Str, type of output that is requested (defaults to | ||
JSON but can be used to retrieve XML) | ||
Returns: | ||
""" | ||
Make a request to PubMed. | ||
Parameters | ||
---------- | ||
url: Str | ||
last part of the URL that is requested (will | ||
be combined with the base url) | ||
parameters: Dict | ||
parameters to use for the request | ||
output: Str | ||
type of output that is requested (defaults to | ||
JSON but can be used to retrieve XML) | ||
Returns | ||
------- | ||
- response Dict / str, if the response is valid JSON it will | ||
be parsed before returning, otherwise a string is | ||
returend | ||
""" | ||
|
||
# Make sure the rate limit is not exceeded | ||
while self._exceededRateLimit(): | ||
pass | ||
|
@@ -178,15 +200,16 @@ def _get( | |
def _getArticles( | ||
self, article_ids: List[str] | ||
) -> Iterable[Union[PubMedArticle, PubMedBookArticle]]: | ||
"""Helper method that batches a list of article IDs and retrieves the content. | ||
"""Batch a list of article IDs and retrieves the content. | ||
Parameters: | ||
Parameters | ||
---------- | ||
- article_ids List, article IDs. | ||
Returns: | ||
Returns | ||
------- | ||
- articles List, article objects. | ||
""" | ||
|
||
# Get the default parameters | ||
parameters = self.parameters.copy() | ||
parameters["id"] = article_ids | ||
|
@@ -214,16 +237,20 @@ def _getArticleIds( | |
max_date: str, | ||
max_results: int, | ||
) -> List[str]: | ||
"""Helper method to retrieve the article IDs for a query. | ||
Parameters: | ||
- query Str, query to be executed against the PubMed database. | ||
- max_results Int, the maximum number of results to retrieve. | ||
Returns: | ||
- article_ids List, article IDs as a list. | ||
"""Retrieve the article IDs for a query. | ||
Parameters | ||
---------- | ||
query: Str | ||
query to be executed against the PubMed database. | ||
max_results: Int | ||
the maximum number of results to retrieve. | ||
Returns | ||
------- | ||
article_ids: List | ||
article IDs as a list. | ||
""" | ||
|
||
# Create a placeholder for the retrieved IDs | ||
article_ids = [] | ||
|
||
|
@@ -269,7 +296,8 @@ def _getArticleIds( | |
if max_results == -1: | ||
max_results = total_result_count | ||
|
||
# If not all articles are retrieved, continue to make requests untill we have everything | ||
# If not all articles are retrieved, continue to make requests until | ||
# we have everything | ||
while ( | ||
retrieved_count < total_result_count | ||
and retrieved_count < max_results | ||
|
@@ -280,7 +308,8 @@ def _getArticleIds( | |
): | ||
parameters["retmax"] = max_results - retrieved_count | ||
|
||
# Start the collection from the number of already retrieved articles | ||
# Start the collection from the number of already retrieved | ||
# articles | ||
parameters["retstart"] = retrieved_count | ||
|
||
# Make a new request | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.