22import json
33from time import sleep
44
5+ from .utils import as_list
6+
7+
8+ VERSION = '0.0.7'
9+
510
611class ApiClient (object ):
712 """OutScraper ApiClient - Python SDK that allows extracting data from Google services via OutScraper API.
@@ -15,28 +20,34 @@ class ApiClient(object):
1520 """
1621
1722 _api_url = 'https://api.app.outscraper.com'
18- _api_key = None
23+ _api_headers = {}
1924
2025 _max_ttl = 60 * 10
2126
2227 def __init__ (self , api_key ):
23- self ._api_key = api_key
24-
25- def get_request_archive (self , request_id ):
26- """Fetch request data from archive
27- Parameters:
28- request_id (int): unique id for the request provided by ['id']
29- Returns:
30- dict: result from the archive
31- """
28+ self ._api_headers = {
29+ 'X-API-KEY' : api_key ,
30+ 'client' : f'Python SDK { VERSION } '
31+ }
32+
33+ def get_request_archive (self , request_id : str ):
34+ '''
35+ Fetch request data from archive
36+
37+ Parameters:
38+ request_id (int): unique id for the request provided by ['id']
39+
40+ Returns:
41+ dict: result from the archive
42+ '''
3243 response = requests .get (f'{ self ._api_url } /requests/{ request_id } ' )
3344
3445 if 199 < response .status_code < 300 :
3546 return response .json ()
3647
3748 raise Exception (f'Response status code: { response .status_code } ' )
3849
39- def _wait_request_archive (self , request_id , requests_pause ):
50+ def _wait_request_archive (self , request_id : str , requests_pause : int ):
4051 ttl = self ._max_ttl / requests_pause
4152
4253 while ttl > 0 :
@@ -49,47 +60,91 @@ def _wait_request_archive(self, request_id, requests_pause):
4960
5061 raise Exception ('Timeout exceeded' )
5162
52- def google_search (self , query , language = 'en' , region = 'us' ):
63+ def google_search (self , query , language = 'en' , region = None ):
64+ '''
65+ Get data from Google search
66+
67+ Parameters:
68+ query (list): parameter defines the query or queries you want to search on Google. Using a lists allows multiple queries to be sent in one request and save on network latency time.
69+ language (str): parameter specifies the language to use for Google. Available values: "en", "de", "es", "es-419", "fr", "hr", "it", "nl", "pl", "pt-BR", "pt-PT", "vi", "tr", "ru", "ar", "th", "ko", "zh-CN", "zh-TW", "ja", "ach", "af", "ak", "ig", "az", "ban", "ceb", "xx-bork", "bs", "br", "ca", "cs", "sn", "co", "cy", "da", "yo", "et", "xx-elmer", "eo", "eu", "ee", "tl", "fil", "fo", "fy", "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "haw", "bem", "rn", "id", "ia", "xh", "zu", "is", "jw", "rw", "sw", "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", "lua", "lg", "hu", "mg", "mt", "mi", "ms", "pcm", "no", "nso", "ny", "nn", "uz", "oc", "om", "xx-pirate", "ro", "rm", "qu", "nyn", "crs", "sq", "sk", "sl", "so", "st", "sr-ME", "sr-Latn", "su", "fi", "sv", "tn", "tum", "tk", "tw", "wo", "el", "be", "bg", "ky", "kk", "mk", "mn", "sr", "tt", "tg", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ps", "sd", "fa", "ckb", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", "kn", "ml", "si", "lo", "my", "km", "chr".
70+ region (str): parameter specifies the language to use for Google. Available values: "AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AG", "AR", "AM", "AU", "AT", "AZ", "BS", "BH", "BD", "BY", "BE", "BZ", "BJ", "BT", "BO", "BA", "BW", "BR", "VG", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "CF", "TD", "CL", "CN", "CO", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "EE", "ET", "FJ", "FI", "FR", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GT", "GG", "GY", "HT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KW", "KG", "LA", "LV", "LB", "LS", "LY", "LI", "LT", "LU", "MG", "MW", "MY", "MV", "ML", "MT", "MU", "MX", "FM", "MD", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NZ", "NI", "NE", "NG", "NU", "MK", "NO", "OM", "PK", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "SI", "SB", "SO", "ZA", "KR", "ES", "LK", "SH", "VC", "SR", "SE", "CH", "TW", "TJ", "TZ", "TH", "TL", "TG", "TO", "TT", "TN", "TR", "TM", "VI", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VU", "VE", "VN", "ZM", "ZW".
71+
72+ Returns:
73+ dict: json result
74+ '''
5375 response = requests .get (f'{ self ._api_url } /search' , params = {
54- 'query' : query ,
76+ 'query' : as_list ( query ) ,
5577 'language' : language ,
5678 'region' : region ,
57- }, headers = { 'X-API-KEY' : self ._api_key } )
79+ }, headers = self ._api_headers )
5880
5981 if 199 < response .status_code < 300 :
6082 sleep (10 )
6183 return self ._wait_request_archive (response .json ()['id' ], 2 )
6284
6385 raise Exception (f'Response status code: { response .status_code } ' )
6486
65- def google_maps_search (self , query , language = 'en' , region = 'us' , limit = 400 , extract_contacts = False , coordinates = None ):
87+ def google_maps_search (self , query , language = 'en' , region = None , limit = 400 , extract_contacts = False , coordinates = None , drop_duplicates = False ):
88+ '''
89+ Get data from Google Maps
90+
91+ Parameters:
92+ query (list): parameter defines the query or queries you want to search on Google Maps. Using a lists allows multiple queries to be sent in one request and save on network latency time.
93+ language (str): parameter specifies the language to use for Google. Available values: "en", "de", "es", "es-419", "fr", "hr", "it", "nl", "pl", "pt-BR", "pt-PT", "vi", "tr", "ru", "ar", "th", "ko", "zh-CN", "zh-TW", "ja", "ach", "af", "ak", "ig", "az", "ban", "ceb", "xx-bork", "bs", "br", "ca", "cs", "sn", "co", "cy", "da", "yo", "et", "xx-elmer", "eo", "eu", "ee", "tl", "fil", "fo", "fy", "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "haw", "bem", "rn", "id", "ia", "xh", "zu", "is", "jw", "rw", "sw", "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", "lua", "lg", "hu", "mg", "mt", "mi", "ms", "pcm", "no", "nso", "ny", "nn", "uz", "oc", "om", "xx-pirate", "ro", "rm", "qu", "nyn", "crs", "sq", "sk", "sl", "so", "st", "sr-ME", "sr-Latn", "su", "fi", "sv", "tn", "tum", "tk", "tw", "wo", "el", "be", "bg", "ky", "kk", "mk", "mn", "sr", "tt", "tg", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ps", "sd", "fa", "ckb", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", "kn", "ml", "si", "lo", "my", "km", "chr".
94+ region (str): parameter specifies the language to use for Google. Available values: "AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AG", "AR", "AM", "AU", "AT", "AZ", "BS", "BH", "BD", "BY", "BE", "BZ", "BJ", "BT", "BO", "BA", "BW", "BR", "VG", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "CF", "TD", "CL", "CN", "CO", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "EE", "ET", "FJ", "FI", "FR", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GT", "GG", "GY", "HT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KW", "KG", "LA", "LV", "LB", "LS", "LY", "LI", "LT", "LU", "MG", "MW", "MY", "MV", "ML", "MT", "MU", "MX", "FM", "MD", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NZ", "NI", "NE", "NG", "NU", "MK", "NO", "OM", "PK", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "SI", "SB", "SO", "ZA", "KR", "ES", "LK", "SH", "VC", "SR", "SE", "CH", "TW", "TJ", "TZ", "TH", "TL", "TG", "TO", "TT", "TN", "TR", "TM", "VI", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VU", "VE", "VN", "ZM", "ZW".
95+ limit (int): parameter specifies the limit of organizations to take from one query search. Usually, there are no more than 400 organizations per one query search on Google Maps. Use more precise categories (asian restaurant, italian restaurant, etc.) to overcome this limitation.
96+ extract_contacts (bool): parameter specifies whether the bot will scrape additional data (emails, social links, site keywords…) from companies’ websites. It increases the time of the extraction.
97+ coordinates (str): parameter defines the coordinates to use along with the query. Example: "@41.3954381,2.1628662,15.1z".
98+ drop_duplicates (bool): parameter specifies whether the bot will drop the same organizations from different queries. Using the parameter combines results from each query inside one big array.
99+
100+ Returns:
101+ dict: json result
102+ '''
66103 response = requests .get (f'{ self ._api_url } /maps/search' , params = {
67104 'query' : query ,
68105 'coordinates' : coordinates ,
69106 'language' : language ,
70107 'region' : region ,
71108 'organizationsPerQueryLimit' : limit ,
72109 'extractContacts' : extract_contacts ,
73- }, headers = {'X-API-KEY' : self ._api_key })
110+ 'dropDuplicates' : drop_duplicates ,
111+ }, headers = self ._api_headers )
74112
75113 if 199 < response .status_code < 300 :
76114 sleep (15 )
77115 return self ._wait_request_archive (response .json ()['id' ], 5 )
78116
79117 raise Exception (f'Response status code: { response .status_code } ' )
80118
81- def google_maps_business_reviews (self , query , language = 'en' , region = 'us' , limit = 100 , cutoff = None , coordinates = None , sort = 'most_relevant' , cutoff_rating = None ):
119+ def google_maps_business_reviews (self , query , language = 'en' , region = None , limit = 100 , cutoff = None , coordinates = None , sort = 'most_relevant' , cutoff_rating = None , organizations_per_query_limit = 1 ):
120+ '''
121+ Get data from Google search
122+
123+ Parameters:
124+ query (list): parameter defines the query or queries you want to search on Google Maps. Using a lists allows multiple queries to be sent in one request and save on network latency time.
125+ language (str): parameter specifies the language to use for Google. Available values: "en", "de", "es", "es-419", "fr", "hr", "it", "nl", "pl", "pt-BR", "pt-PT", "vi", "tr", "ru", "ar", "th", "ko", "zh-CN", "zh-TW", "ja", "ach", "af", "ak", "ig", "az", "ban", "ceb", "xx-bork", "bs", "br", "ca", "cs", "sn", "co", "cy", "da", "yo", "et", "xx-elmer", "eo", "eu", "ee", "tl", "fil", "fo", "fy", "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "haw", "bem", "rn", "id", "ia", "xh", "zu", "is", "jw", "rw", "sw", "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", "lua", "lg", "hu", "mg", "mt", "mi", "ms", "pcm", "no", "nso", "ny", "nn", "uz", "oc", "om", "xx-pirate", "ro", "rm", "qu", "nyn", "crs", "sq", "sk", "sl", "so", "st", "sr-ME", "sr-Latn", "su", "fi", "sv", "tn", "tum", "tk", "tw", "wo", "el", "be", "bg", "ky", "kk", "mk", "mn", "sr", "tt", "tg", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ps", "sd", "fa", "ckb", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", "kn", "ml", "si", "lo", "my", "km", "chr".
126+ region (str): parameter specifies the language to use for Google. Available values: "AF", "AL", "DZ", "AS", "AD", "AO", "AI", "AG", "AR", "AM", "AU", "AT", "AZ", "BS", "BH", "BD", "BY", "BE", "BZ", "BJ", "BT", "BO", "BA", "BW", "BR", "VG", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "CF", "TD", "CL", "CN", "CO", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "EE", "ET", "FJ", "FI", "FR", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GT", "GG", "GY", "HT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KW", "KG", "LA", "LV", "LB", "LS", "LY", "LI", "LT", "LU", "MG", "MW", "MY", "MV", "ML", "MT", "MU", "MX", "FM", "MD", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NZ", "NI", "NE", "NG", "NU", "MK", "NO", "OM", "PK", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RO", "RU", "RW", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "SI", "SB", "SO", "ZA", "KR", "ES", "LK", "SH", "VC", "SR", "SE", "CH", "TW", "TJ", "TZ", "TH", "TL", "TG", "TO", "TT", "TN", "TR", "TM", "VI", "UG", "UA", "AE", "GB", "US", "UY", "UZ", "VU", "VE", "VN", "ZM", "ZW".
127+ limit (int): parameter specifies the limit of reviews to extract from one organization.
128+ cutoff (int): parameter specifies the maximum timestamp value for reviews. Using the cutoff parameter overwrites sort parameter to newest.
129+ coordinates (str): parameter defines the coordinates to use along with the query. Example: "@41.3954381,2.1628662,15.1z".
130+ sort (str): parameter specifies one of the sorting types. Available values: "most_relevant", "newest", "highest_rating", "lowest_rating".
131+ cutoff_rating (int): parameter specifies the maximum (for lowest_rating sorting) or minimum (for highest_rating sorting) rating for reviews. Using the cutoffRating requires sorting to be set to "lowest_rating" or "highest_rating".
132+ organizations_per_query_limit (str): parameter specifies the limit of organizations to take from one query search.
133+
134+ Returns:
135+ dict: json result
136+ '''
82137 response = requests .get (f'{ self ._api_url } /maps/reviews' , params = {
83- 'query' : query ,
138+ 'query' : as_list ( query ) ,
84139 'coordinates' : coordinates ,
85140 'language' : language ,
86141 'region' : region ,
87- 'limit' : 1 ,
88142 'cutoff' : cutoff ,
89143 'cutoffRating' : cutoff_rating ,
144+ 'organizationsPerQueryLimit' : organizations_per_query_limit ,
90145 'reviewsPerOrganizationLimit' : limit ,
91146 'sort' : sort ,
92- }, headers = { 'X-API-KEY' : self ._api_key } )
147+ }, headers = self ._api_headers )
93148
94149 if 199 < response .status_code < 300 :
95150 sleep (30 )
0 commit comments