diff --git a/requirements.txt b/requirements.txt index ffe894a..b4cc30e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests pandas arrow +retrying diff --git a/setup.py b/setup.py index 2346eb3..ffe851e 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( name="stweet", - version="1.2.2", + version="1.3.0", author="Marcin Wątroba", author_email="markowanga@gmail.com", description="Package to scrap tweets", diff --git a/stweet/auth/simple_auth_token_provider.py b/stweet/auth/simple_auth_token_provider.py index 03beb6b..955992a 100644 --- a/stweet/auth/simple_auth_token_provider.py +++ b/stweet/auth/simple_auth_token_provider.py @@ -2,6 +2,7 @@ import json from json import JSONDecodeError +from retrying import retry from .auth_token_provider import AuthTokenProvider, AuthTokenProviderFactory from ..exceptions import RefreshTokenException @@ -25,15 +26,20 @@ def __init__(self, web_client: WebClient): self.web_client = web_client return + @staticmethod + def _get_auth_request_details() -> RequestDetails: + return RequestDetails(HttpMethod.POST, _url, {'Authorization': _auth_token}, dict(), _timeout) + def _request_for_response_body(self): """Method from Twint.""" - token_request_details = RequestDetails(HttpMethod.POST, _url, {'Authorization': _auth_token}, dict(), _timeout) + token_request_details = SimpleAuthTokenProvider._get_auth_request_details() token_response = self.web_client.run_request(token_request_details) if token_response.is_success(): return token_response.text else: raise RefreshTokenException('Error during request for token') + @retry(stop_max_attempt_number=8) def get_new_token(self) -> str: """Method to get refreshed token. In case of error raise RefreshTokenException.""" try: diff --git a/stweet/http_request/interceptor/__init__.py b/stweet/http_request/interceptor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stweet/http_request/interceptor/logging_requests_web_client_interceptor.py b/stweet/http_request/interceptor/logging_requests_web_client_interceptor.py new file mode 100644 index 0000000..c7a0501 --- /dev/null +++ b/stweet/http_request/interceptor/logging_requests_web_client_interceptor.py @@ -0,0 +1,55 @@ +"""Class of LoggingRequestsWebClientInterceptor.""" +import logging +from http.client import HTTPConnection +from typing import List + +from .. import RequestsWebClient, WebClient, RequestDetails, RequestResponse + + +class LoggingRequestsWebClientInterceptor(WebClient.WebClientInterceptor): + """Class of LoggingRequestsWebClientInterceptor.""" + + @staticmethod + def _debug_requests_on(): + """Switches on logging of the requests module.""" + HTTPConnection.debuglevel = 1 + + logging.basicConfig() + logging.getLogger().setLevel(logging.DEBUG) + requests_log = logging.getLogger("requests.packages.urllib3") + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + + @staticmethod + def _debug_requests_off(): + """Switches off logging of the requests module, might be some side-effects.""" + HTTPConnection.debuglevel = 0 + + root_logger = logging.getLogger() + root_logger.setLevel(logging.WARNING) + root_logger.handlers = [] + requests_log = logging.getLogger("requests.packages.urllib3") + requests_log.setLevel(logging.NOTSET) + requests_log.propagate = False + + def logs_to_show(self, params: RequestDetails) -> bool: + """Method to decide that show logs of request. + + Method can be overridden and then the logs will be filtered – example by request url. + """ + return True + + def intercept( + self, + requests_details: RequestDetails, + next_interceptors: List[WebClient.WebClientInterceptor], + web_client: RequestsWebClient + ) -> RequestResponse: + """Method show logs when predicate is true. Uses static field so it can be problem with concurrency.""" + is_to_log = self.logs_to_show(requests_details) + if is_to_log: + LoggingRequestsWebClientInterceptor._debug_requests_on() + to_return = self.get_response(requests_details, next_interceptors, web_client) + if is_to_log: + LoggingRequestsWebClientInterceptor._debug_requests_off() + return to_return diff --git a/stweet/http_request/interceptor/params_response_log_web_client_interceptor.py b/stweet/http_request/interceptor/params_response_log_web_client_interceptor.py new file mode 100644 index 0000000..ca47509 --- /dev/null +++ b/stweet/http_request/interceptor/params_response_log_web_client_interceptor.py @@ -0,0 +1,50 @@ +"""Class of ParamsResponseLogWebClientInterceptor.""" +import threading +from typing import List + +from .. import RequestsWebClient, WebClient, RequestDetails, RequestResponse + + +class ParamsResponseLogWebClientInterceptor(WebClient.WebClientInterceptor): + """Class of ParamsResponseLogWebClientInterceptor. + + Interceptor log input params and out response. + """ + + _counter: int + _lock: threading.Lock + + def __init__(self): + """Constructor of ParamsResponseLogWebClientInterceptor.""" + self._value = 0 + self._lock = threading.Lock() + + def increment(self) -> int: + """Thread safe increment. Returns old value.""" + with self._lock: + to_return = self._value + self._value += 1 + return to_return + + def logs_to_show(self, params: RequestDetails) -> bool: + """Method to decide that show logs of request. + + Method can be overridden and then the logs will be filtered – example by request url. + """ + return True + + def intercept( + self, + requests_details: RequestDetails, + next_interceptors: List[WebClient.WebClientInterceptor], + web_client: RequestsWebClient + ) -> RequestResponse: + """Method show logs when predicate is true. Uses static field so it can be problem with concurrency.""" + is_to_log = self.logs_to_show(requests_details) + index = self.increment() + if is_to_log: + print(f'{index} -- {requests_details}') + to_return = self.get_response(requests_details, next_interceptors, web_client) + if is_to_log: + print(f'{index} -- {to_return}') + return to_return diff --git a/stweet/http_request/requests/requests_web_client.py b/stweet/http_request/requests/requests_web_client.py index 286e613..5464fc4 100644 --- a/stweet/http_request/requests/requests_web_client.py +++ b/stweet/http_request/requests/requests_web_client.py @@ -1,5 +1,7 @@ """Request search_runner class.""" -from typing import Optional, Dict +from __future__ import annotations + +from typing import Optional, Dict, List import requests @@ -18,13 +20,15 @@ class RequestsWebClient(WebClient): def __init__( self, proxy: Optional[RequestsWebClientProxyConfig] = None, - verify: bool = True + verify: bool = True, + interceptors: Optional[List[WebClient.WebClientInterceptor]] = None ): """Constructor of RequestsWebClient.""" + super(RequestsWebClient, self).__init__(interceptors) self.proxy = proxy self.verify = verify - def run_request(self, params: RequestDetails) -> RequestResponse: + def run_clear_request(self, params: RequestDetails) -> RequestResponse: """Main method to run request using requests package.""" session = requests.Session() response = session.request( diff --git a/stweet/http_request/web_client.py b/stweet/http_request/web_client.py index c8fcc5f..e39bb47 100644 --- a/stweet/http_request/web_client.py +++ b/stweet/http_request/web_client.py @@ -1,13 +1,60 @@ """Web client abstract class.""" -from abc import abstractmethod +from __future__ import annotations + +from abc import abstractmethod, ABC +from typing import List, Optional from .request_details import RequestDetails from .request_response import RequestResponse +def _run_request_with_interceptors( + requests_details: RequestDetails, + next_interceptors: List[WebClient.WebClientInterceptor], + web_client: WebClient +) -> RequestResponse: + return next_interceptors[0].intercept(requests_details, next_interceptors[1:], web_client) if len( + next_interceptors) > 0 else web_client.run_clear_request(requests_details) + + class WebClient: """Web client abstract class.""" + _interceptors: List[WebClientInterceptor] + + def __init__(self, interceptors: Optional[List[WebClientInterceptor]]): + """Base constructor of class.""" + self._interceptors = [] if interceptors is None else interceptors + + def run_request(self, requests_details: RequestDetails) -> RequestResponse: + """Method process the request. Method wrap request with interceptors.""" + return _run_request_with_interceptors(requests_details, self._interceptors, self) + @abstractmethod - def run_request(self, params: RequestDetails) -> RequestResponse: - """Abstract method to run request.""" + def run_clear_request(self, params: RequestDetails) -> RequestResponse: + """Abstract method to run only the request.""" + + class WebClientInterceptor(ABC): + """Abstract class of web client interceptor.""" + + @staticmethod + def get_response( + requests_details: RequestDetails, + next_interceptors: List[WebClient.WebClientInterceptor], + web_client: WebClient + ) -> RequestResponse: + """Method process request. If any interceptor passes method wrap request with this.""" + return _run_request_with_interceptors(requests_details, next_interceptors, web_client) + + @abstractmethod + def intercept( + self, + requests_details: RequestDetails, + next_interceptors: List[WebClient.WebClientInterceptor], + web_client: WebClient + ) -> RequestResponse: + """Interceptor method of request. + + Method need to call WebClientInterceptor.get_response to process request by next interceptors + and client. + """ diff --git a/tests/integration/exception_test.py b/tests/integration/exception_test.py index c101852..a9c983c 100644 --- a/tests/integration/exception_test.py +++ b/tests/integration/exception_test.py @@ -3,6 +3,7 @@ import stweet as st from stweet.auth import SimpleAuthTokenProvider from stweet.exceptions import RefreshTokenException, ScrapBatchBadResponse +from stweet.http_request import RequestDetails, RequestResponse from tests.mock_web_client import MockWebClient @@ -28,12 +29,13 @@ def test_get_auth_token_with_incorrect_response_3(): def test_runner_exceptions(): class TokenExpiryExceptionWebClient(st.WebClient): + count_dict = dict({ 'https://api.twitter.com/2/search/adaptive.json': 0, 'https://api.twitter.com/1.1/guest/activate.json': 0 }) - def run_request(self, params: st.http_request.RequestDetails) -> st.http_request.RequestResponse: + def run_clear_request(self, params: st.http_request.RequestDetails) -> st.http_request.RequestResponse: self.count_dict[params.url] = self.count_dict[params.url] + 1 if params.url == 'https://api.twitter.com/2/search/adaptive.json': if self.count_dict[params.url] == 1: @@ -50,7 +52,7 @@ def run_request(self, params: st.http_request.RequestDetails) -> st.http_request st.TweetSearchRunner( search_tweets_task=search_tweets_task, tweet_outputs=[], - web_client=TokenExpiryExceptionWebClient(), + web_client=TokenExpiryExceptionWebClient(None), auth_token_provider_factory=st.auth.SimpleAuthTokenProviderFactory() ).run() diff --git a/tests/integration/interceptor_test.py b/tests/integration/interceptor_test.py new file mode 100644 index 0000000..97b546f --- /dev/null +++ b/tests/integration/interceptor_test.py @@ -0,0 +1,50 @@ +import sys +from io import StringIO + +import stweet as st +from stweet.auth import SimpleAuthTokenProvider +from stweet.http_request import HttpMethod +from stweet.http_request.interceptor.logging_requests_web_client_interceptor import LoggingRequestsWebClientInterceptor +from stweet.http_request.interceptor.params_response_log_web_client_interceptor import \ + ParamsResponseLogWebClientInterceptor + + +def get_example_request_details() -> st.http_request.RequestDetails: + return st.http_request.RequestDetails( + http_method=HttpMethod.GET, + url='https://api.github.com/events', + params=dict({}), + headers=dict({}), + timeout=200 + ) + + +def start_redirect_output() -> StringIO: + captured_output = StringIO() + sys.stdout = captured_output + sys.stderr = captured_output + return captured_output + + +def stop_redirect_output(): + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + +def test_logging_requests_web_client_interceptor(): + captured_output = start_redirect_output() + st.RequestsWebClient(interceptors=[LoggingRequestsWebClientInterceptor()]).run_request( + SimpleAuthTokenProvider._get_auth_request_details()) + stop_redirect_output() + content = captured_output.getvalue() + assert "send: b'POST /1.1/guest/activate.json HTTP/1.1" in content + + +def test_params_response_log_web_client_interceptor(): + captured_output = start_redirect_output() + st.RequestsWebClient(interceptors=[ParamsResponseLogWebClientInterceptor()]).run_request( + SimpleAuthTokenProvider._get_auth_request_details()) + stop_redirect_output() + content = captured_output.getvalue() + assert "RequestDetails(" in content + assert "RequestResponse(" in content