Skip to content

Commit 92ff512

Browse files
author
vlad-outscraper
committed
Add async request option
1 parent b3e02b6 commit 92ff512

File tree

3 files changed

+98
-32
lines changed

3 files changed

+98
-32
lines changed

examples/Run Async Requests.md

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Run Async Requests to Outscraper API
2+
3+
The example shows how to send async requests to Outscraper API and retrieve the results later using request IDs (the requests are processed in parallel).
4+
5+
## Installation
6+
7+
Python 3+
8+
```bash
9+
pip install outscraper
10+
```
11+
[Link to the Python package page](https://pypi.org/project/outscraper/)
12+
13+
## Initialization
14+
```python
15+
from time import sleep
16+
from outscraper import ApiClient
17+
18+
19+
client = ApiClient(api_key='SECRET_API_KEY')
20+
```
21+
[Link to the profile page to create the API key](https://app.outscraper.com/profile)
22+
23+
## Usage
24+
25+
```python
26+
results = []
27+
running_request_ids = set()
28+
place_ids = [
29+
'ChIJNw4_-cWXyFYRF_4GTtujVsw',
30+
'ChIJ39fGAcGXyFYRNdHIXy-W5BA',
31+
'ChIJVVVl-cWXyFYRQYBCEkX0W5Y',
32+
'ChIJScUP1R6XyFYR0sY1UwNzq-c',
33+
'ChIJmeiNBMeXyFYRzQrnMMDV8Jc',
34+
'ChIJifOTBMeXyFYRmu3EGp_QBuY',
35+
'ChIJ1fwt-cWXyFYR2cjoDAGs9UI',
36+
'ChIJ5zQrTzSXyFYRuiY31iE7M1s',
37+
'ChIJQSyf4huXyFYRpP9W4rtBelA',
38+
'ChIJRWK5W2-byFYRiaF9vVgzZA4'
39+
]
40+
41+
for place_id in place_ids:
42+
response = client.google_maps_search(place_id, limit=1, async_request=True)
43+
running_request_ids.add(response['id'])
44+
45+
attempts = 5 # retry 5 times
46+
while attempts and running_request_ids: # stop when no more attempts are left or when no more running request ids
47+
attempts -= 1
48+
sleep(60)
49+
50+
for request_id in list(running_request_ids): # we don't want to change the set while iterating, so cloning it to list
51+
result = client.get_request_archive(request_id)
52+
53+
if result['status'] == 'Success':
54+
results.append(result['data'])
55+
running_request_ids.remove(request_id)
56+
57+
print(results)
58+
```

outscraper/api_client.py

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import requests
22
from time import sleep
3+
from typing import Union
34

45
from .utils import as_list
56

@@ -66,6 +67,20 @@ def get_request_archive(self, request_id: str) -> dict:
6667

6768
raise Exception(f'Response status code: {response.status_code}')
6869

70+
def _handle_response(self, response: requests.models.Response, wait_async: bool = False, async_request: bool = False) -> Union[list, dict]:
71+
if 199 < response.status_code < 300:
72+
if wait_async:
73+
response_json = response.json()
74+
75+
if async_request:
76+
return response_json
77+
else:
78+
return self._wait_request_archive(response_json['id']).get('data', [])
79+
else:
80+
return response.json().get('data', [])
81+
82+
raise Exception(f'Response status code: {response.status_code}')
83+
6984
def _wait_request_archive(self, request_id: str) -> dict:
7085
ttl = self._max_ttl / self._requests_pause
7186

@@ -84,7 +99,7 @@ def _wait_request_archive(self, request_id: str) -> dict:
8499

85100
raise Exception('Timeout exceeded')
86101

87-
def google_search(self, query: list, pages_per_query: int = 1, uule: str = None, language: str = 'en', region: str = None, fields: list = None) -> list:
102+
def google_search(self, query: Union[list, str], pages_per_query: int = 1, uule: str = None, language: str = 'en', region: str = None, fields: list = None, async_request: bool = False) -> Union[list, dict]:
88103
'''
89104
Get data from Google search
90105
@@ -95,14 +110,15 @@ def google_search(self, query: list, pages_per_query: int = 1, uule: str = None,
95110
language (str): parameter specifies the language to use for Google. Available values: "en", "de", "es", "es-419", "fr", "hr", "it", "nl", "pl", "pt-BR", "pt-PT", "vi", "tr", "ru", "ar", "th", "ko", "zh-CN", "zh-TW", "ja", "ach", "af", "ak", "ig", "az", "ban", "ceb", "xx-bork", "bs", "br", "ca", "cs", "sn", "co", "cy", "da", "yo", "et", "xx-elmer", "eo", "eu", "ee", "tl", "fil", "fo", "fy", "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "haw", "bem", "rn", "id", "ia", "xh", "zu", "is", "jw", "rw", "sw", "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", "lua", "lg", "hu", "mg", "mt", "mi", "ms", "pcm", "no", "nso", "ny", "nn", "uz", "oc", "om", "xx-pirate", "ro", "rm", "qu", "nyn", "crs", "sq", "sk", "sl", "so", "st", "sr-ME", "sr-Latn", "su", "fi", "sv", "tn", "tum", "tk", "tw", "wo", "el", "be", "bg", "ky", "kk", "mk", "mn", "sr", "tt", "tg", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ps", "sd", "fa", "ckb", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", "kn", "ml", "si", "lo", "my", "km", "chr".
96111
region (str): parameter specifies the region to use for Google. Available values: "AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AG", "AR", "AM", "AU", "AT", "AZ", "BS", "BH", "BD", "BY", "BE", "BZ", "BJ", "BT", "BO", "BA", "BW", "BR", "VG", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "CF", "TD", "CL", "CN", "CO", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "EE", "ET", "FJ", "FI", "FR", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GT", "GG", "GY", "HT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KW", "KG", "LA", "LV", "LB", "LS", "LY", "LI", "LT", "LU", "MG", "MW", "MY", "MV", "ML", "MT", "MU", "MX", "FM", "MD", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NZ", "NI", "NE", "NG", "NU", "MK", "NO", "OM", "PK", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "SI", "SB", "SO", "ZA", "KR", "ES", "LK", "SH", "VC", "SR", "SE", "CH", "TW", "TJ", "TZ", "TH", "TL", "TG", "TO", "TT", "TN", "TR", "TM", "VI", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VU", "VE", "VN", "ZM", "ZW".
97112
fields (list): parameter defines which fields you want to include with each item returned in the response. By default, it returns all fields.
113+
async_request (bool): parameter defines the way you want to submit your task to Outscraper. It can be set to `False` (default) to send a task and wait until you got your results, or `True` to submit your task and retrieve the results later using a request ID with `get_request_archive`. Each response is available for `2` hours after a request has been completed.
98114
99115
Returns:
100116
list: json result
101117
102118
See: https://app.outscraper.com/api-docs#tag/Google-Search/paths/~1google-search-v2/get
103119
'''
104120
queries = as_list(query)
105-
async_request = len(queries) > 1 or pages_per_query > 1
121+
wait_async = async_request or (len(queries) > 1 or pages_per_query > 1)
106122

107123
response = requests.get(f'{self._api_url}/google-search-v3', params={
108124
'query': queries,
@@ -114,13 +130,7 @@ def google_search(self, query: list, pages_per_query: int = 1, uule: str = None,
114130
'fields': ','.join(fields) if fields else '',
115131
}, headers=self._api_headers)
116132

117-
if 199 < response.status_code < 300:
118-
if async_request:
119-
return self._wait_request_archive(response.json()['id']).get('data', [])
120-
else:
121-
return response.json().get('data', [])
122-
123-
raise Exception(f'Response status code: {response.status_code}')
133+
return self._handle_response(response, wait_async, async_request)
124134

125135
def google_search_news(self, query: list, pages_per_query: int = 1, uule: str = None, tbs: str = None, language: str = 'en', region: str = None, fields: list = None) -> list:
126136
'''
@@ -195,9 +205,9 @@ def google_maps_search_v1(self, query: list, limit: int = 500, extract_contacts:
195205

196206
raise Exception(f'Response status code: {response.status_code}')
197207

198-
def google_maps_search(self, query: list, limit: int = 20, drop_duplicates: bool = False,
199-
language: str = 'en', region: str = None, skip: int = 0, fields: list = None,
200-
) -> list:
208+
def google_maps_search(self, query: Union[list, str], limit: int = 20, drop_duplicates: bool = False,
209+
language: str = 'en', region: str = None, skip: int = 0, fields: list = None, async_request: bool = False
210+
) -> Union[list, dict]:
201211
'''
202212
Get Google Maps Data V2 (speed optimized endpoint for real time data)
203213
@@ -213,27 +223,28 @@ def google_maps_search(self, query: list, limit: int = 20, drop_duplicates: bool
213223
language (str): parameter specifies the language to use for Google. Available values: "en", "de", "es", "es-419", "fr", "hr", "it", "nl", "pl", "pt-BR", "pt-PT", "vi", "tr", "ru", "ar", "th", "ko", "zh-CN", "zh-TW", "ja", "ach", "af", "ak", "ig", "az", "ban", "ceb", "xx-bork", "bs", "br", "ca", "cs", "sn", "co", "cy", "da", "yo", "et", "xx-elmer", "eo", "eu", "ee", "tl", "fil", "fo", "fy", "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "haw", "bem", "rn", "id", "ia", "xh", "zu", "is", "jw", "rw", "sw", "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", "lua", "lg", "hu", "mg", "mt", "mi", "ms", "pcm", "no", "nso", "ny", "nn", "uz", "oc", "om", "xx-pirate", "ro", "rm", "qu", "nyn", "crs", "sq", "sk", "sl", "so", "st", "sr-ME", "sr-Latn", "su", "fi", "sv", "tn", "tum", "tk", "tw", "wo", "el", "be", "bg", "ky", "kk", "mk", "mn", "sr", "tt", "tg", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ps", "sd", "fa", "ckb", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", "kn", "ml", "si", "lo", "my", "km", "chr".
214224
region (str): parameter specifies the region to use for Google. Available values: "AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AG", "AR", "AM", "AU", "AT", "AZ", "BS", "BH", "BD", "BY", "BE", "BZ", "BJ", "BT", "BO", "BA", "BW", "BR", "VG", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "CF", "TD", "CL", "CN", "CO", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "EE", "ET", "FJ", "FI", "FR", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GT", "GG", "GY", "HT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KW", "KG", "LA", "LV", "LB", "LS", "LY", "LI", "LT", "LU", "MG", "MW", "MY", "MV", "ML", "MT", "MU", "MX", "FM", "MD", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NZ", "NI", "NE", "NG", "NU", "MK", "NO", "OM", "PK", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "SI", "SB", "SO", "ZA", "KR", "ES", "LK", "SH", "VC", "SR", "SE", "CH", "TW", "TJ", "TZ", "TH", "TL", "TG", "TO", "TT", "TN", "TR", "TM", "VI", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VU", "VE", "VN", "ZM", "ZW".
215225
fields (list): parameter defines which fields you want to include with each item returned in the response. By default, it returns all fields.
226+
async_request (bool): parameter defines the way you want to submit your task to Outscraper. It can be set to `False` (default) to send a task and wait until you got your results, or `True` to submit your task and retrieve the results later using a request ID with `get_request_archive`. Each response is available for `2` hours after a request has been completed.
216227
217228
Returns:
218229
list: json result
219230
220231
See: https://app.outscraper.com/api-docs#tag/Google-Maps/paths/~1maps~1search-v2/get
221232
'''
233+
queries = as_list(query)
234+
wait_async = async_request or (len(queries) > 10 and limit > 1)
235+
222236
response = requests.get(f'{self._api_url}/maps/search-v2', params={
223-
'query': as_list(query),
237+
'query': queries,
224238
'language': language,
225239
'region': region,
226240
'organizationsPerQueryLimit': limit,
227241
'skipPlaces': skip,
228242
'dropDuplicates': drop_duplicates,
229-
'async': False,
243+
'async': wait_async,
230244
'fields': ','.join(fields) if fields else '',
231245
}, headers=self._api_headers)
232246

233-
if 199 < response.status_code < 300:
234-
return response.json().get('data', [])
235-
236-
raise Exception(f'Response status code: {response.status_code}')
247+
return self._handle_response(response, wait_async, async_request)
237248

238249
def google_maps_directions(self, query: list, departure_time: int = None, finish_time: int = None, interval: int = 60,
239250
language: str = 'en', region: str = None, async_request: bool = False, fields: list = None
@@ -326,10 +337,10 @@ def google_maps_reviews_v2(self, query: list, reviews_limit: int = 100, limit: i
326337

327338
raise Exception(f'Response status code: {response.status_code}')
328339

329-
def google_maps_reviews(self, query: list, reviews_limit: int = 10, limit: int = 1, sort: str = 'most_relevant',
340+
def google_maps_reviews(self, query: Union[list, str], reviews_limit: int = 10, limit: int = 1, sort: str = 'most_relevant',
330341
skip: int = 0, start: int = None, cutoff: int = None, cutoff_rating: int = None, ignore_empty: bool = False,
331-
language: str = 'en', region: str = None, reviews_query: str = None, fields: list = None
332-
) -> list:
342+
language: str = 'en', region: str = None, reviews_query: str = None, fields: list = None, async_request: bool = False
343+
) -> Union[list, dict]:
333344
'''
334345
Get Google Maps Reviews V3 (speed optimized endpoint for real time data)
335346
@@ -352,15 +363,18 @@ def google_maps_reviews(self, query: list, reviews_limit: int = 10, limit: int =
352363
language (str): parameter specifies the language to use for Google. Available values: "en", "de", "es", "es-419", "fr", "hr", "it", "nl", "pl", "pt-BR", "pt-PT", "vi", "tr", "ru", "ar", "th", "ko", "zh-CN", "zh-TW", "ja", "ach", "af", "ak", "ig", "az", "ban", "ceb", "xx-bork", "bs", "br", "ca", "cs", "sn", "co", "cy", "da", "yo", "et", "xx-elmer", "eo", "eu", "ee", "tl", "fil", "fo", "fy", "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "haw", "bem", "rn", "id", "ia", "xh", "zu", "is", "jw", "rw", "sw", "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", "lua", "lg", "hu", "mg", "mt", "mi", "ms", "pcm", "no", "nso", "ny", "nn", "uz", "oc", "om", "xx-pirate", "ro", "rm", "qu", "nyn", "crs", "sq", "sk", "sl", "so", "st", "sr-ME", "sr-Latn", "su", "fi", "sv", "tn", "tum", "tk", "tw", "wo", "el", "be", "bg", "ky", "kk", "mk", "mn", "sr", "tt", "tg", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ps", "sd", "fa", "ckb", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", "kn", "ml", "si", "lo", "my", "km", "chr".
353364
region (str): parameter specifies the region to use for Google. Available values: "AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AG", "AR", "AM", "AU", "AT", "AZ", "BS", "BH", "BD", "BY", "BE", "BZ", "BJ", "BT", "BO", "BA", "BW", "BR", "VG", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "CF", "TD", "CL", "CN", "CO", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "EE", "ET", "FJ", "FI", "FR", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GT", "GG", "GY", "HT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KW", "KG", "LA", "LV", "LB", "LS", "LY", "LI", "LT", "LU", "MG", "MW", "MY", "MV", "ML", "MT", "MU", "MX", "FM", "MD", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NZ", "NI", "NE", "NG", "NU", "MK", "NO", "OM", "PK", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "SI", "SB", "SO", "ZA", "KR", "ES", "LK", "SH", "VC", "SR", "SE", "CH", "TW", "TJ", "TZ", "TH", "TL", "TG", "TO", "TT", "TN", "TR", "TM", "VI", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VU", "VE", "VN", "ZM", "ZW".
354365
fields (list): parameter defines which fields you want to include with each item returned in the response. By default, it returns all fields.
366+
async_request (bool): parameter defines the way you want to submit your task to Outscraper. It can be set to `False` (default) to send a task and wait until you got your results, or `True` to submit your task and retrieve the results later using a request ID with `get_request_archive`. Each response is available for `2` hours after a request has been completed.
355367
356368
Returns:
357369
list: json result
358370
359371
See: https://app.outscraper.com/api-docs#tag/Google-Maps/paths/~1maps~1reviews-v3/get
360372
'''
361-
async_request = reviews_limit > 499
373+
queries = as_list(query)
374+
wait_async = async_request or reviews_limit > 499 or len(queries) > 10
375+
362376
response = requests.get(f'{self._api_url}/maps/reviews-v3', params={
363-
'query': as_list(query),
377+
'query': queries,
364378
'reviewsLimit': reviews_limit,
365379
'limit': limit,
366380
'sort': sort,
@@ -372,17 +386,11 @@ def google_maps_reviews(self, query: list, reviews_limit: int = 10, limit: int =
372386
'ignoreEmpty': ignore_empty,
373387
'language': language,
374388
'region': region,
375-
'async': async_request,
389+
'async': wait_async,
376390
'fields': ','.join(fields) if fields else '',
377391
}, headers=self._api_headers)
378392

379-
if 199 < response.status_code < 300:
380-
if async_request:
381-
return self._wait_request_archive(response.json()['id']).get('data', [])
382-
else:
383-
return response.json().get('data', [])
384-
385-
raise Exception(f'Response status code: {response.status_code}')
393+
return self._handle_response(response, wait_async, async_request)
386394

387395
def google_maps_photos(self, query: list, photosLimit: int = 100, limit: int = 1, coordinates: str = None,
388396
language: str = 'en', region: str = None, fields: list = None

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def readme():
88

99
setup(
1010
name='outscraper',
11-
version='2.1.0',
11+
version='3.0.0',
1212
description='Python bindings for the Outscraper API',
1313
long_description=readme(),
1414
classifiers = ['Programming Language :: Python',

0 commit comments

Comments
 (0)