From 51cd0fb9eec86876e5c9d34bbdd9719f6ccf9e8d Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 27 Feb 2020 14:53:07 +0100 Subject: [PATCH 01/66] Feature: add date handler for past week and past month --- nck/utils/date_handler.py | 20 +++++++++++ tests/utils/test_date_handler.py | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 nck/utils/date_handler.py create mode 100644 tests/utils/test_date_handler.py diff --git a/nck/utils/date_handler.py b/nck/utils/date_handler.py new file mode 100644 index 00000000..e62164e0 --- /dev/null +++ b/nck/utils/date_handler.py @@ -0,0 +1,20 @@ +import calendar +from datetime import date, timedelta +from typing import Tuple + + +def get_date_start_and_date_stop_from_range( + date_range: str +) -> Tuple[date, date]: + today = date.today() + if date_range == "PREVIOUS_MONTH": + last_day_of_previous_month = \ + today.replace(day=1) - timedelta(days=1) + year = last_day_of_previous_month.year + month = last_day_of_previous_month.month + return date(year, month, 1), date(year, month, calendar.monthrange(year, month)[1]) + elif date_range == "PREVIOUS_WEEK": + first_day_of_last_week = today - timedelta(days=today.weekday(), weeks=1) + return first_day_of_last_week, first_day_of_last_week + timedelta(days=6) + else: + return None diff --git a/tests/utils/test_date_handler.py b/tests/utils/test_date_handler.py new file mode 100644 index 00000000..b26fbdb4 --- /dev/null +++ b/tests/utils/test_date_handler.py @@ -0,0 +1,60 @@ +from datetime import date +import unittest +from unittest.mock import patch + +from parameterized import parameterized + +from nck.utils.date_handler import get_date_start_and_date_stop_from_range + + +class TestDateHandler(unittest.TestCase): + + @parameterized.expand([ + ( + date(2020, 2, 1), + (date(2020, 1, 1), date(2020, 1, 31)) + ), + ( + date(2020, 1, 1), + (date(2019, 12, 1), date(2019, 12, 31)) + ), + ( + date(2020, 2, 15), + (date(2020, 1, 1), date(2020, 1, 31)) + ), + ( + date(2019, 12, 1), + (date(2019, 11, 1), date(2019, 11, 30)) + ) + ]) + def test_get_date_start_and_date_stop_with_previous_month(self, date_of_day, expected): + input_range = "PREVIOUS_MONTH" + with patch("nck.utils.date_handler.date") as mock_date: + mock_date.today.return_value = date_of_day + mock_date.side_effect = lambda *args, **kw: date(*args, **kw) + self.assertTupleEqual( + expected, + get_date_start_and_date_stop_from_range(input_range), + f"Bad return when freezed date is {date_of_day}" + ) + + @parameterized.expand([ + ( + date(2020, 1, 6), + (date(2019, 12, 30), date(2020, 1, 5)) + ), + ( + date(2020, 1, 13), + (date(2020, 1, 6), date(2020, 1, 12)) + ) + ]) + def test_get_date_start_and_date_stop_with_previous_week(self, date_of_day, expected): + input_range = "PREVIOUS_WEEK" + with patch("nck.utils.date_handler.date") as mock_date: + mock_date.today.return_value = date_of_day + mock_date.side_effect = lambda *args, **kw: date(*args, **kw) + self.assertTupleEqual( + expected, + get_date_start_and_date_stop_from_range(input_range), + f"Bad return when freezed date is {date_of_day}" + ) From bdd68c746b4ff052c752165a00db8c10dd3e168a Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 27 Feb 2020 17:07:23 +0100 Subject: [PATCH 02/66] Feature: add date to each line of reach report --- nck/readers/dbm_reader.py | 23 ++++++++++++++++++++--- nck/utils/text.py | 18 +++++++++++++++--- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/nck/readers/dbm_reader.py b/nck/readers/dbm_reader.py index 02c7ccc8..47802378 100644 --- a/nck/readers/dbm_reader.py +++ b/nck/readers/dbm_reader.py @@ -57,6 +57,15 @@ @click.option("--dbm-query-param-type", default="TYPE_TRUEVIEW") @click.option("--dbm-start-date", type=click.DateTime()) @click.option("--dbm-end-date", type=click.DateTime()) +@click.option( + "--dbm-add-date-to-report", + type=click.BOOL, + default=False, + help=( + "Sometimes the date range on which metrics are computed is missing from the report. " + "If this option is set to True, this range will be added." + ) +) @click.option("--dbm-filter", type=click.Tuple([str, int]), multiple=True) @click.option("--dbm-file-type", multiple=True) @click.option( @@ -69,7 +78,7 @@ "--dbm-day-range", required=True, default="LAST_7_DAYS", - type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS"]), + type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK"]), ) @processor("dbm_access_token", "dbm_refresh_token", "dbm_client_secret") def dbm(**kwargs): @@ -180,10 +189,18 @@ def get_query_report_url(self, existing_query=True): return url def get_query_report(self, existing_query=True): - url = self.get_query_report_url(existing_query) report = requests.get(url, stream=True) - return get_generator_dict_from_str_csv(report.iter_lines()) + if self.kwargs["query_param_type"] == "TYPE_REACH_AND_FREQUENCY" \ + and self.kwargs["add_date_to_report"]: + return get_generator_dict_from_str_csv( + report.iter_lines(), + add_date=True, + day_range=self.kwargs["day_range"], + date_format=self.kwargs.get("date_format") + ) + else: + return get_generator_dict_from_str_csv(report.iter_lines()) def list_query_reports(self): reports_infos = self._client.reports().listreports(queryId=self.kwargs.get("query_id")).execute() diff --git a/nck/utils/text.py b/nck/utils/text.py index 14ddfc9d..89dff3aa 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -21,6 +21,8 @@ import csv from io import StringIO +from nck.utils.date_handler import get_date_start_and_date_stop_from_range + def add_column_value_to_csv_line_iterator(line_iterator, columname, value): first_line = True @@ -37,7 +39,10 @@ def add_column_value_to_csv_line_iterator(line_iterator, columname, value): def get_generator_dict_from_str_csv( - line_iterator: Generator[Union[bytes, str], None, None] + line_iterator: Generator[Union[bytes, str], None, None], + add_date=False, + day_range=None, + date_format="%Y-%m-%d" ) -> Generator[Dict[str, str], None, None]: first_line = next(line_iterator) headers = ( @@ -45,6 +50,8 @@ def get_generator_dict_from_str_csv( if isinstance(first_line, bytes) else first_line.split(",") ) + if add_date: + headers.extend(["date_start", "date_stop"]) for line in line_iterator: if isinstance(line, bytes): try: @@ -58,10 +65,15 @@ def get_generator_dict_from_str_csv( err.object[err.start : err.end], ) line = line.decode("utf-8", errors="ignore") + if line == "": break - else: - yield dict(zip(headers, parse_decoded_line(line))) + + if add_date: + start, end = get_date_start_and_date_stop_from_range(day_range) + line += f", {start.strftime(date_format)}, {end.strftime(date_format)}" + + yield dict(zip(headers, parse_decoded_line(line))) def parse_decoded_line(line: str, delimiter=",", quotechar='"') -> List[str]: From 4e53cf1434192a56db10bc7700e8386e3e661066 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 27 Feb 2020 17:17:13 +0100 Subject: [PATCH 03/66] Fix: apply american standard, weeks go from sunday to saturday --- nck/utils/date_handler.py | 3 ++- tests/utils/test_date_handler.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/nck/utils/date_handler.py b/nck/utils/date_handler.py index e62164e0..08260fa2 100644 --- a/nck/utils/date_handler.py +++ b/nck/utils/date_handler.py @@ -14,7 +14,8 @@ def get_date_start_and_date_stop_from_range( month = last_day_of_previous_month.month return date(year, month, 1), date(year, month, calendar.monthrange(year, month)[1]) elif date_range == "PREVIOUS_WEEK": - first_day_of_last_week = today - timedelta(days=today.weekday(), weeks=1) + # The API uses American standard, weeks go from sunday yo next saturday + first_day_of_last_week = today - timedelta(days=today.weekday() + 1, weeks=1) return first_day_of_last_week, first_day_of_last_week + timedelta(days=6) else: return None diff --git a/tests/utils/test_date_handler.py b/tests/utils/test_date_handler.py index b26fbdb4..0c50f72b 100644 --- a/tests/utils/test_date_handler.py +++ b/tests/utils/test_date_handler.py @@ -41,11 +41,11 @@ def test_get_date_start_and_date_stop_with_previous_month(self, date_of_day, exp @parameterized.expand([ ( date(2020, 1, 6), - (date(2019, 12, 30), date(2020, 1, 5)) + (date(2019, 12, 29), date(2020, 1, 4)) ), ( date(2020, 1, 13), - (date(2020, 1, 6), date(2020, 1, 12)) + (date(2020, 1, 5), date(2020, 1, 11)) ) ]) def test_get_date_start_and_date_stop_with_previous_week(self, date_of_day, expected): From 596c9af3889f3aba9ecd0ddaf97a6ae8b9d2df1a Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 27 Feb 2020 17:23:13 +0100 Subject: [PATCH 04/66] Fix: add vscode config to gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index aa72cf14..57f9190c 100644 --- a/.gitignore +++ b/.gitignore @@ -95,6 +95,9 @@ venv.bak/ .spyderproject .spyproject +# Visual studio code config +.vscode/ + # Rope project settings .ropeproject From e9e1542310532ae14af16e0c63f0a7da4318f386 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 27 Feb 2020 17:23:38 +0100 Subject: [PATCH 05/66] Fix: add better parsing of header row --- nck/utils/text.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nck/utils/text.py b/nck/utils/text.py index 89dff3aa..0baccfc5 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -46,9 +46,9 @@ def get_generator_dict_from_str_csv( ) -> Generator[Dict[str, str], None, None]: first_line = next(line_iterator) headers = ( - first_line.decode("utf-8").split(",") + parse_decoded_line(first_line.decode("utf-8")) if isinstance(first_line, bytes) - else first_line.split(",") + else parse_decoded_line(first_line) ) if add_date: headers.extend(["date_start", "date_stop"]) From 07007d50fb6d15fb731e1e673795bd5a52d09941 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 27 Feb 2020 18:38:02 +0100 Subject: [PATCH 06/66] Fix: remove customer-specific code in tests and implemented a new test to check dates are added --- tests/utils/test_text_utils.py | 176 ++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 79 deletions(-) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index 45e9783c..db36e2ca 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -15,8 +15,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from datetime import date import logging import unittest +from unittest import mock from nck.utils.text import get_generator_dict_from_str_csv, parse_decoded_line @@ -25,12 +27,9 @@ class TestTextUtilsMethod(unittest.TestCase): def test_multiple_encodings(self): test_string_to_encode = ( - "BR,Sanofi Aventis Brasil,3945535,Active,Allegra,3992233,0,," - "YR_Sanofi_Allegra_201910_Consideration_DV360_Precision_" - "Native-Ads_Cross-Device_BR,11140383,Active,," - "YR_Sanofi_Allegra_201910_Consideration_DV360_Precision_OA" - "_Native-Ads_DV-Affinity-Health_Desktop_BR," - '1130016,0,," ",0.00,4080863' + 'BR,test_partner,123,Active,test_advertiser,123,' + '0,,test_io,123,Active,,test_line_item' + ',123,0,,"",0.00,41' ) lines = [ (b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" @@ -48,30 +47,24 @@ def test_multiple_encodings(self): line_iterator_multiple_encodings = (line for line in lines) expected_dict = { "Country": "BR", - "Partner": "Sanofi Aventis Brasil", - "Partner ID": "3945535", + "Partner": "test_partner", + "Partner ID": "123", "Partner Status": "Active", - "Advertiser": "Allegra", - "Advertiser ID": "3992233", + "Advertiser": "test_advertiser", + "Advertiser ID": "123", "Advertiser Status": "0", "Advertiser Integration Code": "", - "Insertion Order": ( - "YR_Sanofi_Allegra_201910_Consideration_DV360" - "_Precision_Native-Ads_Cross-Device_BR" - ), - "Insertion Order ID": "11140383", + "Insertion Order": "test_io", + "Insertion Order ID": "123", "Insertion Order Status": "Active", "Insertion Order Integration Code": "", - "Line Item": ( - "YR_Sanofi_Allegra_201910_Consideration_DV360_Precision_" - "OA_Native-Ads_DV-Affinity-Health_Desktop_BR" - ), - "Line Item ID": "1130016", + "Line Item": "test_line_item", + "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": ' ', + "Targeted Data Providers": '', "Cookie Reach: Average Impression Frequency": "0.00", - "Cookie Reach: Impression Reach": "4080863", + "Cookie Reach: Impression Reach": "41", } for yielded_dict in get_generator_dict_from_str_csv( line_iterator_multiple_encodings @@ -96,32 +89,24 @@ def test_blank_line(self): lines.insert( 1, - (b'BR,Sanofi Aventis Brasil,3945535,Active,Allegra,3992233,' - b'0,,YR_Sanofi_Awareness_2019_Allegra_Hardsell_Display_DV360' - b'_Cross-Device_BR,8674464,Active,,YR_Sanofi_Allegra_Hardsell' - b'_Display_Datalogix-Health-Beauty-Buyers-Allergy_Desktop_BR' - b',26143278,0,,"",0.00,41')) + (b'BR,test_partner,123,Active,test_advertiser,123,' + b'0,,test_io,123,Active,,test_line_item' + b',123,0,,"",0.00,41')) expected_dict = { "Country": "BR", - "Partner": "Sanofi Aventis Brasil", - "Partner ID": "3945535", + "Partner": "test_partner", + "Partner ID": "123", "Partner Status": "Active", - "Advertiser": "Allegra", - "Advertiser ID": "3992233", + "Advertiser": "test_advertiser", + "Advertiser ID": "123", "Advertiser Status": "0", "Advertiser Integration Code": "", - "Insertion Order": ( - "YR_Sanofi_Awareness_2019_Allegra_Hardsell_Display_DV360" - "_Cross-Device_BR" - ), - "Insertion Order ID": "8674464", + "Insertion Order": "test_io", + "Insertion Order ID": "123", "Insertion Order Status": "Active", "Insertion Order Integration Code": "", - "Line Item": ( - "YR_Sanofi_Allegra_Hardsell_Display_Datalogix-Health" - "-Beauty-Buyers-Allergy_Desktop_BR" - ), - "Line Item ID": "26143278", + "Line Item": "test_line_item", + "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", "Targeted Data Providers": '', @@ -152,34 +137,26 @@ def test_invalid_byte(self): b" Status,Line Item Integration Code,Targeted Data Providers," b"Cookie Reach: Average Impression Frequency,Cookie Reach: " b"Impression Reach"), - (b'BR,Sanofi Aventis Brasil,3945535,Active,Allegra,3992233,' - b'0,,YR_Sanofi_Awareness_2019_Allegra_Hardsell_Display_DV360' - b'_Cross-Device_BR,8674464,Active,,YR_Sanofi_Allegra_Hardsell' - b'_Display_Datalogix-Health-Beauty-Buyers-Allergy_Desktop_BR' - b',26143278,0,," \x91\xea\xd0$",0.00,41'), + (b'BR,test_partner,123,Active,test_advertiser,123,' + b'0,,test_io,123,Active,,test_line_item' + b',123,0,," \x91\xea\xd0$",0.00,41'), ] line_iterator_invalid_byte = (line for line in lines) expected_dict = { "Country": "BR", - "Partner": "Sanofi Aventis Brasil", - "Partner ID": "3945535", + "Partner": "test_partner", + "Partner ID": "123", "Partner Status": "Active", - "Advertiser": "Allegra", - "Advertiser ID": "3992233", + "Advertiser": "test_advertiser", + "Advertiser ID": "123", "Advertiser Status": "0", "Advertiser Integration Code": "", - "Insertion Order": ( - "YR_Sanofi_Awareness_2019_Allegra_Hardsell_Display_DV360" - "_Cross-Device_BR" - ), - "Insertion Order ID": "8674464", + "Insertion Order": "test_io", + "Insertion Order ID": "123", "Insertion Order Status": "Active", "Insertion Order Integration Code": "", - "Line Item": ( - "YR_Sanofi_Allegra_Hardsell_Display_Datalogix-Health-Beauty" - "-Buyers-Allergy_Desktop_BR" - ), - "Line Item ID": "26143278", + "Line Item": "test_line_item", + "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", "Targeted Data Providers": ' $', @@ -207,33 +184,25 @@ def test_response_not_binary(self): " Status,Line Item Integration Code,Targeted Data Providers," "Cookie Reach: Average Impression Frequency,Cookie Reach: " "Impression Reach"), - ('BR,Sanofi Aventis Brasil,3945535,Active,Allegra,3992233,' - '0,,YR_Sanofi_Awareness_2019_Allegra_Hardsell_Display_DV360' - '_Cross-Device_BR,8674464,Active,,YR_Sanofi_Allegra_Hardsell' - '_Display_Datalogix-Health-Beauty-Buyers-Allergy_Desktop_BR' - ',26143278,0,,"",0.00,41') + ('BR,test_partner,123,Active,test_advertiser,123,' + '0,,test_io,123,Active,,test_line_item' + ',123,0,,"",0.00,41') ] expected_dict = { "Country": "BR", - "Partner": "Sanofi Aventis Brasil", - "Partner ID": "3945535", + "Partner": "test_partner", + "Partner ID": "123", "Partner Status": "Active", - "Advertiser": "Allegra", - "Advertiser ID": "3992233", + "Advertiser": "test_advertiser", + "Advertiser ID": "123", "Advertiser Status": "0", "Advertiser Integration Code": "", - "Insertion Order": ( - "YR_Sanofi_Awareness_2019_Allegra_Hardsell_Display_DV360" - "_Cross-Device_BR" - ), - "Insertion Order ID": "8674464", + "Insertion Order": "test_io", + "Insertion Order ID": "123", "Insertion Order Status": "Active", "Insertion Order Integration Code": "", - "Line Item": ( - "YR_Sanofi_Allegra_Hardsell_Display_Datalogix-Health" - "-Beauty-Buyers-Allergy_Desktop_BR" - ), - "Line Item ID": "26143278", + "Line Item": "test_line_item", + "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", "Targeted Data Providers": '', @@ -262,3 +231,52 @@ def test_line_parsing(self): parse_decoded_line(input_lines[index]), expected_outputs[index] ) + + @mock.patch( + "nck.utils.date_handler.get_date_start_and_date_stop_from_range", + return_value=(date(2020, 1, 1), date(2020, 1, 31)) + ) + def test_response_not_binary_with_date(self, mock_date_function): + lines = [ + ("Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + " ID,Advertiser Status,Advertiser Integration Code,Insertion" + " Order,Insertion Order ID,Insertion Order Status,Insertion" + " Order Integration Code,Line Item,Line Item ID,Line Item" + " Status,Line Item Integration Code,Targeted Data Providers," + "Cookie Reach: Average Impression Frequency,Cookie Reach: " + "Impression Reach"), + ('BR,test_partner,123,Active,test_advertiser,123,' + '0,,test_io,123,Active,,test_line_item' + ',123,0,,"",0.00,41') + ] + expected_dict = { + "Country": "BR", + "Partner": "test_partner", + "Partner ID": "123", + "Partner Status": "Active", + "Advertiser": "test_advertiser", + "Advertiser ID": "123", + "Advertiser Status": "0", + "Advertiser Integration Code": "", + "Insertion Order": "test_io", + "Insertion Order ID": "123", + "Insertion Order Status": "Active", + "Insertion Order Integration Code": "", + "Line Item": "test_line_item", + "Line Item ID": "123", + "Line Item Status": "0", + "Line Item Integration Code": "", + "Targeted Data Providers": '', + "Cookie Reach: Average Impression Frequency": "0.00", + "Cookie Reach: Impression Reach": "41", + "date_start": "2020/01/01", + "date_stop": "2020/01/31" + } + line_iterator_with_blank_line = (line for line in lines) + for dic in get_generator_dict_from_str_csv( + line_iterator_with_blank_line, + add_date=True, + day_range="PREVIOUS_MONTH", + date_format="%Y/%m/%d" + ): + self.assertEqual(dic, expected_dict) From ff4b14b1882c53ac31f2c9e1be4e12fddc7cd7e0 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 28 Feb 2020 20:10:37 +0100 Subject: [PATCH 07/66] functional reader - no tests --- nck/clients/dcm_client.py | 33 ++++++ nck/helpers/dcm_helper.py | 29 +++++ nck/readers/__init__.py | 2 + nck/readers/dcm_reader.py | 222 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 286 insertions(+) create mode 100644 nck/clients/dcm_client.py create mode 100644 nck/helpers/dcm_helper.py create mode 100644 nck/readers/dcm_reader.py diff --git a/nck/clients/dcm_client.py b/nck/clients/dcm_client.py new file mode 100644 index 00000000..65bce27e --- /dev/null +++ b/nck/clients/dcm_client.py @@ -0,0 +1,33 @@ +import logging +import httplib2 + +from oauth2client import client, GOOGLE_TOKEN_URI +from googleapiclient import discovery + +logger = logging.getLogger("CM_client") + + +class DCMClient: + API_NAME = "dfareporting" + API_VERSION = "v3.3" + + def __init__(self, access_token, client_id, client_secret, refresh_token): + # self._access_token = access_token, + # self._client_id = client_id, + # self._client_secret = client_secret, + # self._refresh_token = refresh_token, + self._credentials = client.GoogleCredentials( + access_token=access_token, + client_id=client_id, + client_secret=client_secret, + refresh_token=refresh_token, + token_expiry=None, + token_uri=GOOGLE_TOKEN_URI, + user_agent=None, + ) + http = self._credentials.authorize(httplib2.Http()) + self._credentials.refresh(http) + self.auth = ( + self._credentials.token_response["token_type"] + " " + self._credentials.token_response["access_token"] + ) + self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) diff --git a/nck/helpers/dcm_helper.py b/nck/helpers/dcm_helper.py new file mode 100644 index 00000000..1f45c995 --- /dev/null +++ b/nck/helpers/dcm_helper.py @@ -0,0 +1,29 @@ +CRITERIA_MAPPING = { + "STANDARD": "criteria", + "REACH": "reachCriteria", + "PATH_TO_CONVERSION": "pathToConversionCriteria", + "FLOODLIGHT": "floodlightCriteria", + "CROSS_DIMENSION_REACH": "crossDimensionReachCriteria", +} + +REPORT_TYPES = list(CRITERIA_MAPPING.keys()) + +DATE_RANGES = [ + "LAST_14_DAYS", + "LAST_24_MONTHS", + "LAST_30_DAYS", + "LAST_365_DAYS", + "LAST_60_DAYS", + "LAST_7_DAYS", + "LAST_90_DAYS", + "MONTH_TO_DATE", + "PREVIOUS_MONTH", + "PREVIOUS_QUARTER", + "PREVIOUS_WEEK", + "PREVIOUS_YEAR", + "QUARTER_TO_DATE", + "TODAY", + "WEEK_TO_DATE", + "YEAR_TO_DATE", + "YESTERDAY", +] diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 5596896c..f2cb21a1 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -26,6 +26,7 @@ from nck.readers.salesforce_reader import salesforce from nck.readers.facebook_reader import facebook_marketing from nck.readers.dbm_reader import dbm +from nck.readers.dcm_reader import dcm from nck.readers.ga_reader import ga from nck.readers.search_console_reader import search_console from nck.readers.adobe_reader import adobe @@ -42,6 +43,7 @@ facebook_marketing, oracle, dbm, + dcm, ga, search_console, adobe, diff --git a/nck/readers/dcm_reader.py b/nck/readers/dcm_reader.py new file mode 100644 index 00000000..9f9bb19c --- /dev/null +++ b/nck/readers/dcm_reader.py @@ -0,0 +1,222 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import csv +import re +from io import StringIO + +import click +import logging + +import requests + +from click import ClickException +from tenacity import retry, wait_exponential, stop_after_delay + +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.utils.args import extract_args +from nck.streams.format_date_stream import FormatDateStream +from nck.clients.dcm_client import DCMClient +from nck.helpers.dcm_helper import REPORT_TYPES + +logger = logging.getLogger("CM_client") + +DATEFORMAT = "%Y-%m-%d" +ENCODING = "utf-8" + + +@click.command(name="read_dcm") +@click.option("--dcm-access-token", default=None) +@click.option("--dcm-client-id", required=True) +@click.option("--dcm-client-secret", required=True) +@click.option("--dcm-refresh-token", required=True) +@click.option("--dcm-profile-id", required=True) +@click.option("--dcm-report-name", default="DCM Report") +@click.option("--dcm-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) +@click.option( + "--dcm-metric", + "dcm_metrics", + multiple=True, + help="https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-metrics", +) +@click.option( + "--dcm-dimension", + "dcm_dimensions", + multiple=True, + help="https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-dimensions", +) +@click.option("--dcm-start-date", type=click.DateTime(), required=True) +@click.option("--dcm-end-date", type=click.DateTime(), required=True) +@click.option( + "--dcm-filter", + "dcm_filters", + type=click.Tuple([str, str]), + multiple=True, + help="A filter is a tuple following this pattern: (dimensionName, dimensionValue). " + "https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-filters", +) +@click.option( + "--dcm-date-format", + default="%Y-%m-%d", + help="And optional date format for the output stream. " + "Follow the syntax of https://docs.python.org/3.8/library/datetime.html#strftime-strptime-behavior", +) +@processor("dcm_access_token", "dcm_refresh_token", "dcm_client_secret") +def dcm(**kwargs): + return DcmReader(**extract_args("dcm_", kwargs)) + + +class DcmReader(Reader): + def __init__( + self, + access_token, + client_id, + client_secret, + refresh_token, + profile_id, + report_name, + report_type, + metrics, + dimensions, + start_date, + end_date, + filters, + date_format, + ): + self.dcm_client = DCMClient(access_token, client_id, client_secret, refresh_token) + self.profile_id = profile_id + self.report_name = report_name + self.report_type = report_type + self.metrics = list(metrics) + self.dimensions = dimensions + self.start_date = start_date + self.end_date = end_date + self.filters = list(filters) + self.download_format = "CSV" + self.date_format = date_format + + def build_report_skeleton(self, report_name, report_type): + report = { + # Set the required fields "name" and "type". + "name": report_name, + "type": report_type, + "format": self.download_format, + } + return report + + @staticmethod + def get_date_range(start_date=None, end_date=None): + if start_date and end_date: + start = start_date.strftime("%Y-%m-%d") + end = end_date.strftime("%Y-%m-%d") + logger.warning("Custom date range selected: " + start + " --> " + end) + return {"startDate": start, "endDate": end} + else: + raise ClickException("Please provide start date and end date in your request") + + def add_report_criteria(self, report, start_date, end_date, metrics, dimensions): + criteria = { + "dateRange": self.get_date_range(start_date, end_date), + "dimensions": [{"name": dim} for dim in dimensions], + "metricNames": metrics, + } + report["criteria"] = criteria + + def add_dimension_filters(self, report, profile_id, filters): + for dimension_name, dimension_value in filters: + request = { + "dimensionName": dimension_name, + "endDate": report["criteria"]["dateRange"]["endDate"], + "startDate": report["criteria"]["dateRange"]["startDate"], + } + values = self.dcm_client._service.dimensionValues().query(profileId=profile_id, body=request).execute() + + report["criteria"]["dimensionFilters"] = report["criteria"].get("dimensionFilters", []) + if values["items"]: + # Add value as a filter to the report criteria. + filter_value = next((val for val in values["items"] if val["value"] == dimension_value), {}) + if filter_value: + report["criteria"]["dimensionFilters"].append(filter_value) + + # @retry(wait=wait_exponential(multiplier=60, min=60, max=240), stop=stop_after_delay(3600)) + @retry(wait=wait_exponential(multiplier=1, min=1, max=4), stop=stop_after_delay(3600)) + def is_report_file_ready(self, report_id, file_id): + """Poke the report file status""" + report_file = self.dcm_client._service.files().get(reportId=report_id, fileId=file_id).execute() + + status = report_file["status"] + if status == "REPORT_AVAILABLE": + logger.info("File status is %s, ready to download." % status) + return True + elif status != "PROCESSING": + raise ClickException("File status is %s, processing failed." % status) + else: + raise ClickException("File status is PROCESSING") + + def direct_download(self, report_id, file_id): + # Retrieve the file metadata. + report_file = self.dcm_client._service.files().get(reportId=report_id, fileId=file_id).execute() + + if report_file["status"] == "REPORT_AVAILABLE": + # Create a get request. + request = self.dcm_client._service.files().get_media(reportId=report_id, fileId=file_id) + headers = request.headers + headers.update({"Authorization": self.dcm_client.auth}) + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() + + def format_response(self, report_generator): + is_main_data = False + headers = [] + + for row in report_generator: + decoded_row = row.decode(ENCODING) + if re.match("^Report Fields", decoded_row): + decoded_row = next(report_generator).decode(ENCODING) + headers = decoded_row.split(",") + decoded_row = next(report_generator).decode(ENCODING) + is_main_data = True + if re.match("^Grand Total", decoded_row): + is_main_data = False + + if is_main_data: + csv_reader = csv.DictReader(StringIO(decoded_row), headers) + yield next(csv_reader) + + def read(self): + def result_generator(): + report = self.build_report_skeleton(self.report_name, self.report_type) + self.add_report_criteria(report, self.start_date, self.end_date, self.metrics, self.dimensions) + self.add_dimension_filters(report, self.profile_id, self.filters) + + inserted_report = ( + self.dcm_client._service.reports().insert(profileId=self.profile_id, body=report).execute() + ) + + report_id = inserted_report["id"] + + file = self.dcm_client._service.reports().run(profileId=self.profile_id, reportId=report_id).execute() + + file_id = file["id"] + + self.is_report_file_ready(file_id=file_id, report_id=report_id) + yield from self.format_response(self.direct_download(report_id, file_id)) + + # should replace results later by a good identifier + yield FormatDateStream("results", result_generator(), keys=["Date"], date_format=self.date_format) From 60ac24479058464792a337b6289310961f5c3837 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Sun, 1 Mar 2020 16:54:16 +0100 Subject: [PATCH 08/66] split logic - reader/client/helper --- nck/clients/dcm_client.py | 87 ++++++++++++++++++++++++++++++-- nck/readers/dcm_reader.py | 102 ++++---------------------------------- 2 files changed, 92 insertions(+), 97 deletions(-) diff --git a/nck/clients/dcm_client.py b/nck/clients/dcm_client.py index 65bce27e..16ed2a58 100644 --- a/nck/clients/dcm_client.py +++ b/nck/clients/dcm_client.py @@ -1,5 +1,9 @@ import logging import httplib2 +import requests + +from click import ClickException +from tenacity import retry, wait_exponential, stop_after_delay from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery @@ -12,10 +16,6 @@ class DCMClient: API_VERSION = "v3.3" def __init__(self, access_token, client_id, client_secret, refresh_token): - # self._access_token = access_token, - # self._client_id = client_id, - # self._client_secret = client_secret, - # self._refresh_token = refresh_token, self._credentials = client.GoogleCredentials( access_token=access_token, client_id=client_id, @@ -31,3 +31,82 @@ def __init__(self, access_token, client_id, client_secret, refresh_token): self._credentials.token_response["token_type"] + " " + self._credentials.token_response["access_token"] ) self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) + self.download_format = "CSV" + + def build_report_skeleton(self, report_name, report_type): + report = { + # Set the required fields "name" and "type". + "name": report_name, + "type": report_type, + "format": self.download_format, + } + return report + + @staticmethod + def get_date_range(start_date=None, end_date=None): + if start_date and end_date: + start = start_date.strftime("%Y-%m-%d") + end = end_date.strftime("%Y-%m-%d") + logger.warning("Custom date range selected: " + start + " --> " + end) + return {"startDate": start, "endDate": end} + else: + raise ClickException("Please provide start date and end date in your request") + + def add_report_criteria(self, report, start_date, end_date, metrics, dimensions): + criteria = { + "dateRange": self.get_date_range(start_date, end_date), + "dimensions": [{"name": dim} for dim in dimensions], + "metricNames": metrics, + } + report["criteria"] = criteria + + def add_dimension_filters(self, report, profile_id, filters): + for dimension_name, dimension_value in filters: + request = { + "dimensionName": dimension_name, + "endDate": report["criteria"]["dateRange"]["endDate"], + "startDate": report["criteria"]["dateRange"]["startDate"], + } + values = self._service.dimensionValues().query(profileId=profile_id, body=request).execute() + + report["criteria"]["dimensionFilters"] = report["criteria"].get("dimensionFilters", []) + if values["items"]: + # Add value as a filter to the report criteria. + filter_value = next((val for val in values["items"] if val["value"] == dimension_value), {}) + if filter_value: + report["criteria"]["dimensionFilters"].append(filter_value) + + def run_report(self, report, profile_id): + inserted_report = self._service.reports().insert(profileId=profile_id, body=report).execute() + report_id = inserted_report["id"] + file = self._service.reports().run(profileId=profile_id, reportId=report_id).execute() + file_id = file["id"] + return report_id, file_id + + # @retry(wait=wait_exponential(multiplier=60, min=60, max=240), stop=stop_after_delay(3600)) + @retry(wait=wait_exponential(multiplier=1, min=1, max=4), stop=stop_after_delay(3600)) + def is_report_file_ready(self, report_id, file_id): + """Poke the report file status""" + report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() + + status = report_file["status"] + if status == "REPORT_AVAILABLE": + logger.info("File status is %s, ready to download." % status) + return True + elif status != "PROCESSING": + raise ClickException("File status is %s, processing failed." % status) + else: + raise ClickException("File status is PROCESSING") + + def direct_report_download(self, report_id, file_id): + # Retrieve the file metadata. + report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() + + if report_file["status"] == "REPORT_AVAILABLE": + # Create a get request. + request = self._service.files().get_media(reportId=report_id, fileId=file_id) + headers = request.headers + headers.update({"Authorization": self.auth}) + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() diff --git a/nck/readers/dcm_reader.py b/nck/readers/dcm_reader.py index 9f9bb19c..0b5962fa 100644 --- a/nck/readers/dcm_reader.py +++ b/nck/readers/dcm_reader.py @@ -22,11 +22,6 @@ import click import logging -import requests - -from click import ClickException -from tenacity import retry, wait_exponential, stop_after_delay - from nck.commands.command import processor from nck.readers.reader import Reader from nck.utils.args import extract_args @@ -107,81 +102,10 @@ def __init__( self.start_date = start_date self.end_date = end_date self.filters = list(filters) - self.download_format = "CSV" self.date_format = date_format - def build_report_skeleton(self, report_name, report_type): - report = { - # Set the required fields "name" and "type". - "name": report_name, - "type": report_type, - "format": self.download_format, - } - return report - @staticmethod - def get_date_range(start_date=None, end_date=None): - if start_date and end_date: - start = start_date.strftime("%Y-%m-%d") - end = end_date.strftime("%Y-%m-%d") - logger.warning("Custom date range selected: " + start + " --> " + end) - return {"startDate": start, "endDate": end} - else: - raise ClickException("Please provide start date and end date in your request") - - def add_report_criteria(self, report, start_date, end_date, metrics, dimensions): - criteria = { - "dateRange": self.get_date_range(start_date, end_date), - "dimensions": [{"name": dim} for dim in dimensions], - "metricNames": metrics, - } - report["criteria"] = criteria - - def add_dimension_filters(self, report, profile_id, filters): - for dimension_name, dimension_value in filters: - request = { - "dimensionName": dimension_name, - "endDate": report["criteria"]["dateRange"]["endDate"], - "startDate": report["criteria"]["dateRange"]["startDate"], - } - values = self.dcm_client._service.dimensionValues().query(profileId=profile_id, body=request).execute() - - report["criteria"]["dimensionFilters"] = report["criteria"].get("dimensionFilters", []) - if values["items"]: - # Add value as a filter to the report criteria. - filter_value = next((val for val in values["items"] if val["value"] == dimension_value), {}) - if filter_value: - report["criteria"]["dimensionFilters"].append(filter_value) - - # @retry(wait=wait_exponential(multiplier=60, min=60, max=240), stop=stop_after_delay(3600)) - @retry(wait=wait_exponential(multiplier=1, min=1, max=4), stop=stop_after_delay(3600)) - def is_report_file_ready(self, report_id, file_id): - """Poke the report file status""" - report_file = self.dcm_client._service.files().get(reportId=report_id, fileId=file_id).execute() - - status = report_file["status"] - if status == "REPORT_AVAILABLE": - logger.info("File status is %s, ready to download." % status) - return True - elif status != "PROCESSING": - raise ClickException("File status is %s, processing failed." % status) - else: - raise ClickException("File status is PROCESSING") - - def direct_download(self, report_id, file_id): - # Retrieve the file metadata. - report_file = self.dcm_client._service.files().get(reportId=report_id, fileId=file_id).execute() - - if report_file["status"] == "REPORT_AVAILABLE": - # Create a get request. - request = self.dcm_client._service.files().get_media(reportId=report_id, fileId=file_id) - headers = request.headers - headers.update({"Authorization": self.dcm_client.auth}) - r = requests.get(request.uri, stream=True, headers=headers) - - yield from r.iter_lines() - - def format_response(self, report_generator): + def format_response(report_generator): is_main_data = False headers = [] @@ -201,22 +125,14 @@ def format_response(self, report_generator): def read(self): def result_generator(): - report = self.build_report_skeleton(self.report_name, self.report_type) - self.add_report_criteria(report, self.start_date, self.end_date, self.metrics, self.dimensions) - self.add_dimension_filters(report, self.profile_id, self.filters) - - inserted_report = ( - self.dcm_client._service.reports().insert(profileId=self.profile_id, body=report).execute() - ) - - report_id = inserted_report["id"] - - file = self.dcm_client._service.reports().run(profileId=self.profile_id, reportId=report_id).execute() - - file_id = file["id"] - - self.is_report_file_ready(file_id=file_id, report_id=report_id) - yield from self.format_response(self.direct_download(report_id, file_id)) + report = self.dcm_client.build_report_skeleton(self.report_name, self.report_type) + self.dcm_client.add_report_criteria(report, self.start_date, self.end_date, self.metrics, self.dimensions) + self.dcm_client.add_dimension_filters(report, self.profile_id, self.filters) + report_id, file_id = self.dcm_client.run_report(report, self.profile_id) + self.dcm_client.is_report_file_ready(file_id=file_id, report_id=report_id) + report_generator = self.dcm_client.direct_report_download(report_id, file_id) + + yield from self.format_response(report_generator) # should replace results later by a good identifier yield FormatDateStream("results", result_generator(), keys=["Date"], date_format=self.date_format) From 5c52ea4d3831aec4e90774982d956a4c9203563f Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Sun, 1 Mar 2020 19:49:06 +0100 Subject: [PATCH 09/66] handle several profile IDs --- nck/clients/dcm_client.py | 1 - nck/readers/dcm_reader.py | 38 ++++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/nck/clients/dcm_client.py b/nck/clients/dcm_client.py index 16ed2a58..c0a24cd3 100644 --- a/nck/clients/dcm_client.py +++ b/nck/clients/dcm_client.py @@ -4,7 +4,6 @@ from click import ClickException from tenacity import retry, wait_exponential, stop_after_delay - from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery diff --git a/nck/readers/dcm_reader.py b/nck/readers/dcm_reader.py index 0b5962fa..c3bcf28e 100644 --- a/nck/readers/dcm_reader.py +++ b/nck/readers/dcm_reader.py @@ -17,10 +17,9 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import csv import re -from io import StringIO - import click -import logging + +from io import StringIO from nck.commands.command import processor from nck.readers.reader import Reader @@ -29,8 +28,6 @@ from nck.clients.dcm_client import DCMClient from nck.helpers.dcm_helper import REPORT_TYPES -logger = logging.getLogger("CM_client") - DATEFORMAT = "%Y-%m-%d" ENCODING = "utf-8" @@ -40,7 +37,7 @@ @click.option("--dcm-client-id", required=True) @click.option("--dcm-client-secret", required=True) @click.option("--dcm-refresh-token", required=True) -@click.option("--dcm-profile-id", required=True) +@click.option("--dcm-profile-id", "dcm_profile_ids", required=True, multiple=True) @click.option("--dcm-report-name", default="DCM Report") @click.option("--dcm-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) @click.option( @@ -83,7 +80,7 @@ def __init__( client_id, client_secret, refresh_token, - profile_id, + profile_ids, report_name, report_type, metrics, @@ -94,7 +91,7 @@ def __init__( date_format, ): self.dcm_client = DCMClient(access_token, client_id, client_secret, refresh_token) - self.profile_id = profile_id + self.profile_ids = list(profile_ids) self.report_name = report_name self.report_type = report_type self.metrics = list(metrics) @@ -123,16 +120,21 @@ def format_response(report_generator): csv_reader = csv.DictReader(StringIO(decoded_row), headers) yield next(csv_reader) - def read(self): - def result_generator(): - report = self.dcm_client.build_report_skeleton(self.report_name, self.report_type) - self.dcm_client.add_report_criteria(report, self.start_date, self.end_date, self.metrics, self.dimensions) - self.dcm_client.add_dimension_filters(report, self.profile_id, self.filters) - report_id, file_id = self.dcm_client.run_report(report, self.profile_id) - self.dcm_client.is_report_file_ready(file_id=file_id, report_id=report_id) - report_generator = self.dcm_client.direct_report_download(report_id, file_id) + def result_generator(self): + report = self.dcm_client.build_report_skeleton(self.report_name, self.report_type) + self.dcm_client.add_report_criteria(report, self.start_date, self.end_date, self.metrics, self.dimensions) + + for profile_id in self.profile_ids: + self.dcm_client.add_dimension_filters(report, profile_id, self.filters) + + report_id, file_id = self.dcm_client.run_report(report, profile_id) - yield from self.format_response(report_generator) + is_ready = self.dcm_client.is_report_file_ready(file_id=file_id, report_id=report_id) + if is_ready: + report_generator = self.dcm_client.direct_report_download(report_id, file_id) + yield from self.format_response(report_generator) + + def read(self): # should replace results later by a good identifier - yield FormatDateStream("results", result_generator(), keys=["Date"], date_format=self.date_format) + yield FormatDateStream("results", self.result_generator(), keys=["Date"], date_format=self.date_format) From 37e1fa34191b6ebcef2feb52af4879cb5836206c Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Mon, 2 Mar 2020 09:56:54 +0100 Subject: [PATCH 10/66] Quality: remove spacesin csv --- nck/utils/text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nck/utils/text.py b/nck/utils/text.py index 0baccfc5..d80c5b13 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -71,7 +71,7 @@ def get_generator_dict_from_str_csv( if add_date: start, end = get_date_start_and_date_stop_from_range(day_range) - line += f", {start.strftime(date_format)}, {end.strftime(date_format)}" + line += f",{start.strftime(date_format)},{end.strftime(date_format)}" yield dict(zip(headers, parse_decoded_line(line))) From 3ea60f07660b43454b8a91767288df3fd5b805d1 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Mon, 2 Mar 2020 10:09:57 +0100 Subject: [PATCH 11/66] Fix: tests failed in CI --- tests/utils/test_text_utils.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index db36e2ca..4b11014b 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -18,7 +18,7 @@ from datetime import date import logging import unittest -from unittest import mock +from unittest.mock import patch from nck.utils.text import get_generator_dict_from_str_csv, parse_decoded_line @@ -232,11 +232,7 @@ def test_line_parsing(self): expected_outputs[index] ) - @mock.patch( - "nck.utils.date_handler.get_date_start_and_date_stop_from_range", - return_value=(date(2020, 1, 1), date(2020, 1, 31)) - ) - def test_response_not_binary_with_date(self, mock_date_function): + def test_response_not_binary_with_date(self): lines = [ ("Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" " ID,Advertiser Status,Advertiser Integration Code,Insertion" @@ -273,10 +269,13 @@ def test_response_not_binary_with_date(self, mock_date_function): "date_stop": "2020/01/31" } line_iterator_with_blank_line = (line for line in lines) - for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line, - add_date=True, - day_range="PREVIOUS_MONTH", - date_format="%Y/%m/%d" - ): - self.assertEqual(dic, expected_dict) + with patch("nck.utils.date_handler.date") as mock_date: + mock_date.today.return_value = date(2020, 2, 1) + mock_date.side_effect = lambda *args, **kw: date(*args, **kw) + for dic in get_generator_dict_from_str_csv( + line_iterator_with_blank_line, + add_date=True, + day_range="PREVIOUS_MONTH", + date_format="%Y/%m/%d" + ): + self.assertEqual(dic, expected_dict) From f06f771991303195491bdbba621d6c1d06033bfb Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Mon, 2 Mar 2020 18:44:38 +0100 Subject: [PATCH 12/66] DCM reader V0 - no tests --- nck/clients/dcm_client.py | 5 +++-- nck/readers/dcm_reader.py | 31 ++++++++++++++----------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/nck/clients/dcm_client.py b/nck/clients/dcm_client.py index c0a24cd3..f9dde254 100644 --- a/nck/clients/dcm_client.py +++ b/nck/clients/dcm_client.py @@ -74,6 +74,8 @@ def add_dimension_filters(self, report, profile_id, filters): filter_value = next((val for val in values["items"] if val["value"] == dimension_value), {}) if filter_value: report["criteria"]["dimensionFilters"].append(filter_value) + else: + logger.info("Filter not found: %s - %s" % (dimension_name, dimension_value)) def run_report(self, report, profile_id): inserted_report = self._service.reports().insert(profileId=profile_id, body=report).execute() @@ -82,8 +84,7 @@ def run_report(self, report, profile_id): file_id = file["id"] return report_id, file_id - # @retry(wait=wait_exponential(multiplier=60, min=60, max=240), stop=stop_after_delay(3600)) - @retry(wait=wait_exponential(multiplier=1, min=1, max=4), stop=stop_after_delay(3600)) + @retry(wait=wait_exponential(multiplier=60, min=60, max=240), stop=stop_after_delay(3600)) def is_report_file_ready(self, report_id, file_id): """Poke the report file status""" report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() diff --git a/nck/readers/dcm_reader.py b/nck/readers/dcm_reader.py index c3bcf28e..7190e23d 100644 --- a/nck/readers/dcm_reader.py +++ b/nck/readers/dcm_reader.py @@ -24,7 +24,7 @@ from nck.commands.command import processor from nck.readers.reader import Reader from nck.utils.args import extract_args -from nck.streams.format_date_stream import FormatDateStream +from nck.streams.normalized_json_stream import NormalizedJSONStream from nck.clients.dcm_client import DCMClient from nck.helpers.dcm_helper import REPORT_TYPES @@ -62,12 +62,6 @@ help="A filter is a tuple following this pattern: (dimensionName, dimensionValue). " "https://developers.google.com/doubleclick-advertisers/v3.3/dimensions/#standard-filters", ) -@click.option( - "--dcm-date-format", - default="%Y-%m-%d", - help="And optional date format for the output stream. " - "Follow the syntax of https://docs.python.org/3.8/library/datetime.html#strftime-strptime-behavior", -) @processor("dcm_access_token", "dcm_refresh_token", "dcm_client_secret") def dcm(**kwargs): return DcmReader(**extract_args("dcm_", kwargs)) @@ -88,36 +82,31 @@ def __init__( start_date, end_date, filters, - date_format, ): self.dcm_client = DCMClient(access_token, client_id, client_secret, refresh_token) self.profile_ids = list(profile_ids) self.report_name = report_name self.report_type = report_type self.metrics = list(metrics) - self.dimensions = dimensions + self.dimensions = list(dimensions) self.start_date = start_date self.end_date = end_date self.filters = list(filters) - self.date_format = date_format - @staticmethod - def format_response(report_generator): + def format_response(self, report_generator): is_main_data = False - headers = [] for row in report_generator: decoded_row = row.decode(ENCODING) if re.match("^Report Fields", decoded_row): - decoded_row = next(report_generator).decode(ENCODING) - headers = decoded_row.split(",") + next(report_generator) decoded_row = next(report_generator).decode(ENCODING) is_main_data = True if re.match("^Grand Total", decoded_row): is_main_data = False if is_main_data: - csv_reader = csv.DictReader(StringIO(decoded_row), headers) + csv_reader = csv.DictReader(StringIO(decoded_row), self.dimensions + self.metrics) yield next(csv_reader) def result_generator(self): @@ -137,4 +126,12 @@ def result_generator(self): def read(self): # should replace results later by a good identifier - yield FormatDateStream("results", self.result_generator(), keys=["Date"], date_format=self.date_format) + yield DCMStream("results" + "_".join(self.profile_ids), self.result_generator()) + + +class DCMStream(NormalizedJSONStream): + DCM_PREFIX = "^dfa:" + + @staticmethod + def _normalize_key(key): + return re.split(DCMStream.DCM_PREFIX, key)[-1].replace(" ", "_").replace("-", "_") From 6c4c59eac856037f82f0f6767a3e96b3e9430562 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 3 Mar 2020 17:14:50 +0100 Subject: [PATCH 13/66] Fix: remove deprecated version and add most recent one --- nck/readers/dbm_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nck/readers/dbm_reader.py b/nck/readers/dbm_reader.py index 47802378..8fd47359 100644 --- a/nck/readers/dbm_reader.py +++ b/nck/readers/dbm_reader.py @@ -234,7 +234,7 @@ def get_sdf_body(self): filter_ids = [str(filt[1]) for filt in self.kwargs.get("filter")] file_types = self.kwargs.get("file_type") - body_sdf = {"version": "4.2", "filterIds": filter_ids, "filterType": filter_types, "fileTypes": file_types} + body_sdf = {"version": "5.1", "filterIds": filter_ids, "filterType": filter_types, "fileTypes": file_types} return body_sdf def get_sdf_objects(self): From 0b7c64ca7be355ea22fd9610fe77091da38bfd1c Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 4 Mar 2020 11:39:00 +0100 Subject: [PATCH 14/66] Tests + quick fixes --- nck/clients/dcm_client.py | 67 +++++++++++++++++++---------- nck/helpers/dcm_helper.py | 17 ++++++++ nck/readers/dcm_reader.py | 12 +++--- tests/clients/test_dcm_client.py | 72 ++++++++++++++++++++++++++++++++ tests/readers/test_dcm_reader.py | 50 ++++++++++++++++++++++ 5 files changed, 189 insertions(+), 29 deletions(-) create mode 100644 tests/clients/test_dcm_client.py create mode 100644 tests/readers/test_dcm_reader.py diff --git a/nck/clients/dcm_client.py b/nck/clients/dcm_client.py index f9dde254..dfa12c68 100644 --- a/nck/clients/dcm_client.py +++ b/nck/clients/dcm_client.py @@ -1,3 +1,20 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging import httplib2 import requests @@ -7,7 +24,9 @@ from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery -logger = logging.getLogger("CM_client") +logger = logging.getLogger("DCM_client") + +DOWNLOAD_FORMAT = "CSV" class DCMClient: @@ -27,30 +46,20 @@ def __init__(self, access_token, client_id, client_secret, refresh_token): http = self._credentials.authorize(httplib2.Http()) self._credentials.refresh(http) self.auth = ( - self._credentials.token_response["token_type"] + " " + self._credentials.token_response["access_token"] + f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" ) self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) - self.download_format = "CSV" - def build_report_skeleton(self, report_name, report_type): + @staticmethod + def build_report_skeleton(report_name, report_type): report = { # Set the required fields "name" and "type". "name": report_name, "type": report_type, - "format": self.download_format, + "format": DOWNLOAD_FORMAT, } return report - @staticmethod - def get_date_range(start_date=None, end_date=None): - if start_date and end_date: - start = start_date.strftime("%Y-%m-%d") - end = end_date.strftime("%Y-%m-%d") - logger.warning("Custom date range selected: " + start + " --> " + end) - return {"startDate": start, "endDate": end} - else: - raise ClickException("Please provide start date and end date in your request") - def add_report_criteria(self, report, start_date, end_date, metrics, dimensions): criteria = { "dateRange": self.get_date_range(start_date, end_date), @@ -63,19 +72,19 @@ def add_dimension_filters(self, report, profile_id, filters): for dimension_name, dimension_value in filters: request = { "dimensionName": dimension_name, - "endDate": report["criteria"]["dateRange"]["endDate"], "startDate": report["criteria"]["dateRange"]["startDate"], + "endDate": report["criteria"]["dateRange"]["endDate"], } values = self._service.dimensionValues().query(profileId=profile_id, body=request).execute() report["criteria"]["dimensionFilters"] = report["criteria"].get("dimensionFilters", []) if values["items"]: # Add value as a filter to the report criteria. - filter_value = next((val for val in values["items"] if val["value"] == dimension_value), {}) + filter_value = self.get_filter_value(dimension_value, values) if filter_value: report["criteria"]["dimensionFilters"].append(filter_value) else: - logger.info("Filter not found: %s - %s" % (dimension_name, dimension_value)) + logger.info(f"Filter not found: {dimension_name} - {dimension_value}") def run_report(self, report, profile_id): inserted_report = self._service.reports().insert(profileId=profile_id, body=report).execute() @@ -85,16 +94,16 @@ def run_report(self, report, profile_id): return report_id, file_id @retry(wait=wait_exponential(multiplier=60, min=60, max=240), stop=stop_after_delay(3600)) - def is_report_file_ready(self, report_id, file_id): + def assert_report_file_ready(self, report_id, file_id): """Poke the report file status""" report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() status = report_file["status"] if status == "REPORT_AVAILABLE": - logger.info("File status is %s, ready to download." % status) - return True + logger.info(f"File status is {status}, ready to download.") + pass elif status != "PROCESSING": - raise ClickException("File status is %s, processing failed." % status) + raise ClickException(f"File status is {status}, processing failed.") else: raise ClickException("File status is PROCESSING") @@ -110,3 +119,17 @@ def direct_report_download(self, report_id, file_id): r = requests.get(request.uri, stream=True, headers=headers) yield from r.iter_lines() + + @staticmethod + def get_date_range(start_date=None, end_date=None): + if start_date and end_date: + start = start_date.strftime("%Y-%m-%d") + end = end_date.strftime("%Y-%m-%d") + logger.warning(f"Custom date range selected: {start} --> {end}") + return {"startDate": start, "endDate": end} + else: + raise ClickException("Please provide start date and end date in your request") + + @staticmethod + def get_filter_value(dimension_value, values): + return next((val for val in values["items"] if val["value"] == dimension_value), {}) diff --git a/nck/helpers/dcm_helper.py b/nck/helpers/dcm_helper.py index 1f45c995..05f64a7f 100644 --- a/nck/helpers/dcm_helper.py +++ b/nck/helpers/dcm_helper.py @@ -1,3 +1,20 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. CRITERIA_MAPPING = { "STANDARD": "criteria", "REACH": "reachCriteria", diff --git a/nck/readers/dcm_reader.py b/nck/readers/dcm_reader.py index 7190e23d..2b565727 100644 --- a/nck/readers/dcm_reader.py +++ b/nck/readers/dcm_reader.py @@ -98,11 +98,11 @@ def format_response(self, report_generator): for row in report_generator: decoded_row = row.decode(ENCODING) - if re.match("^Report Fields", decoded_row): + if decoded_row.startswith("Report Fields"): next(report_generator) decoded_row = next(report_generator).decode(ENCODING) is_main_data = True - if re.match("^Grand Total", decoded_row): + if decoded_row.startswith("Grand Total"): is_main_data = False if is_main_data: @@ -118,14 +118,12 @@ def result_generator(self): report_id, file_id = self.dcm_client.run_report(report, profile_id) - is_ready = self.dcm_client.is_report_file_ready(file_id=file_id, report_id=report_id) + self.dcm_client.assert_report_file_ready(file_id=file_id, report_id=report_id) - if is_ready: - report_generator = self.dcm_client.direct_report_download(report_id, file_id) - yield from self.format_response(report_generator) + report_generator = self.dcm_client.direct_report_download(report_id, file_id) + yield from self.format_response(report_generator) def read(self): - # should replace results later by a good identifier yield DCMStream("results" + "_".join(self.profile_ids), self.result_generator()) diff --git a/tests/clients/test_dcm_client.py b/tests/clients/test_dcm_client.py new file mode 100644 index 00000000..0b3629ad --- /dev/null +++ b/tests/clients/test_dcm_client.py @@ -0,0 +1,72 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase, mock +from datetime import datetime + +from nck.clients.dcm_client import DCMClient + + +class MockService: + def dimensionValues(self): + return self + + def query(self, *args, **kwargs): + return self + + def execute(self): + return True + + +def mock_service(*args, **kwargs): + return MockService() + + +class DCMClientTest(TestCase): + def mock_dcm_client(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + kwargs = {"_service": mock_service()} + + @mock.patch.object(DCMClient, "__init__", mock_dcm_client) + def test_add_report_criteria(self): + report = {"name": "report"} + start = datetime(year=2020, month=1, day=1) + end = datetime(year=2020, month=2, day=1) + elements = ["a", "b"] + DCMClient(**self.kwargs).add_report_criteria(report, start, end, elements, elements) + expected = { + "name": "report", + "criteria": { + "dateRange": {"startDate": "2020-01-01", "endDate": "2020-02-01"}, + "dimensions": [{"name": "a"}, {"name": "b"}], + "metricNames": ["a", "b"], + }, + } + assert report == expected + + @mock.patch.object(DCMClient, "__init__", mock_dcm_client) + @mock.patch.object(MockService, "execute", lambda *args: {"items": [{"value": "ok"}, {"value": "nok"}]}) + @mock.patch("tests.clients.test_dcm_client.MockService") + def test_add_dimension_filters(self, mock_filter): + report = {"criteria": {"dateRange": {"endDate": "", "startDate": ""}}} + profile_id = "" + filters = [("filter", "ok")] + DCMClient(**self.kwargs).add_dimension_filters(report, profile_id, filters) + expected = {"criteria": {"dateRange": {"endDate": "", "startDate": ""}, "dimensionFilters": [{"value": "ok"}]}} + assert report == expected diff --git a/tests/readers/test_dcm_reader.py b/tests/readers/test_dcm_reader.py new file mode 100644 index 00000000..b084fd95 --- /dev/null +++ b/tests/readers/test_dcm_reader.py @@ -0,0 +1,50 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase, mock +import logging + +from nck.readers.dcm_reader import DcmReader + +logger = logging.getLogger("DCM_reader_test") + + +class DCMReaderTest(TestCase): + def mock_dcm_reader(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + kwargs = {"metrics": ["impressions", "clicks"], "dimensions": ["date"]} + + @mock.patch.object(DcmReader, "__init__", mock_dcm_reader) + def test_empty_data(self): + reader = DcmReader(**self.kwargs) + input_report = (row for row in [b"No", b"Consistent", b"Data"]) + if len(list(reader.format_response(input_report))) > 0: + assert False, "Data is not empty" + + @mock.patch.object(DcmReader, "__init__", mock_dcm_reader) + def test_format_data(self): + reader = DcmReader(**self.kwargs) + input_report = (row for row in [b"x", b"x", b"Report Fields", b"headers", b"1,2,3", b"4,5,6", b"Grand Total"]) + expected = [{"date": "1", "impressions": "2", "clicks": "3"}, {"date": "4", "impressions": "5", "clicks": "6"}] + input_list = list(reader.format_response(input_report)) + assert len(input_list) == len(expected) + + logger.info(f"{str(input_list)}\n{str(expected)}") + for input_row, output in zip(input_list, expected): + assert input_row == output From 99fa70ab53277fa65224bf53aaf748ab9b9918c6 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 4 Mar 2020 16:04:33 +0100 Subject: [PATCH 15/66] fix Exceptions --- nck/clients/dcm_client.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nck/clients/dcm_client.py b/nck/clients/dcm_client.py index dfa12c68..a0d819f9 100644 --- a/nck/clients/dcm_client.py +++ b/nck/clients/dcm_client.py @@ -19,7 +19,6 @@ import httplib2 import requests -from click import ClickException from tenacity import retry, wait_exponential, stop_after_delay from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery @@ -103,9 +102,9 @@ def assert_report_file_ready(self, report_id, file_id): logger.info(f"File status is {status}, ready to download.") pass elif status != "PROCESSING": - raise ClickException(f"File status is {status}, processing failed.") + raise FileNotFoundError(f"File status is {status}, processing failed.") else: - raise ClickException("File status is PROCESSING") + raise FileNotFoundError("File status is PROCESSING") def direct_report_download(self, report_id, file_id): # Retrieve the file metadata. @@ -128,7 +127,7 @@ def get_date_range(start_date=None, end_date=None): logger.warning(f"Custom date range selected: {start} --> {end}") return {"startDate": start, "endDate": end} else: - raise ClickException("Please provide start date and end date in your request") + raise SyntaxError("Please provide start date and end date in your request") @staticmethod def get_filter_value(dimension_value, values): From 0e7017031efece2ffdaf6ea5a6497f8b47eac5c6 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 10 Mar 2020 17:24:23 +0100 Subject: [PATCH 16/66] Fix: add new fields --- nck/helpers/facebook_helper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index aa5daedf..36e82be8 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -140,6 +140,10 @@ "start_time": "start_time", "stop_time": "end_time", "daily_budget": "daily_budget", + "device_platform": "device_platform", + "platform_position": "platform_position", + "publisher_platform": "publisher_platform", + "impression_device": "impression_device" } From 0fa78aac0d930c1a616b1a0d64d9d3c794727eb3 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Wed, 11 Mar 2020 18:32:47 +0100 Subject: [PATCH 17/66] Feature: add all cl options for yandex reader --- nck/readers/__init__.py | 2 + nck/readers/yandex_reader.py | 104 +++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 nck/readers/yandex_reader.py diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index f2cb21a1..0ad05054 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -31,6 +31,7 @@ from nck.readers.search_console_reader import search_console from nck.readers.adobe_reader import adobe from nck.readers.radarly_reader import radarly +from nck.readers.yandex_reader import yandex readers = [ @@ -48,6 +49,7 @@ search_console, adobe, radarly, + yandex ] diff --git a/nck/readers/yandex_reader.py b/nck/readers/yandex_reader.py new file mode 100644 index 00000000..adb32670 --- /dev/null +++ b/nck/readers/yandex_reader.py @@ -0,0 +1,104 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import click + +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.utils.args import extract_args +from nck.helpers.yandex_helper import ( + LANGUAGES, REPORT_TYPES, FIELDS, ATTRIBUTION_MODELS, + DATE_RANGE_TYPES, OPERATORS +) + + +class StrList(click.ParamType): + + def convert(self, value, param, ctx): + return value.split(",") + + +STR_LIST_TYPE = StrList() + + +@click.command(name="read_yandex") +@click.option("--yandex-token", required=True) +@click.option( + "--yandex-report-language", + type=click.Choice(LANGUAGES), + default="en" +) +@click.option( + "--yandex-filter", + multiple=True, + type=click.Tuple([click.Choice(FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE]) +) +@click.option( + "--yandex-attribution-model", + multiple=True, + type=click.Choice(ATTRIBUTION_MODELS) +) +@click.option( + "--yandex-max-rows", + type=int +) +@click.option( + "--yandex-field-name", + multiple=True, + type=click.Choice(FIELDS), + required=True, + help=( + "Fields to output in the report (columns)." + "For the full list of fields and their meanings, " + "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" + ) +) +@click.option( + "--yandex-report-name", + required=True +) +@click.option( + "--yandex-report-type", + type=click.Choice(REPORT_TYPES), + required=True +) +@click.option( + "--yandex-date-range", + type=click.Choice(DATE_RANGE_TYPES), + required=True +) +@click.option( + "--yandex-include-vat", + type=click.BOOL, + default=False, + help="Whether to include VAT in the monetary amounts in the report." +) +@click.option( + "--yandex-date-start", + type=click.DateTime() +) +@click.option( + "--yandex-date-stop", + type=click.DateTime() +) +@processor("yandex_token") +def yandex(**kwargs): + return YandexReader(**extract_args("yandex_", kwargs)) + + +class YandexReader(Reader): + pass From 4760b0602b52506aa1a07ef70572af7b3ec96099 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Wed, 11 Mar 2020 18:33:37 +0100 Subject: [PATCH 18/66] Feature: add values for cl options --- nck/helpers/yandex_helper.py | 116 +++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 nck/helpers/yandex_helper.py diff --git a/nck/helpers/yandex_helper.py b/nck/helpers/yandex_helper.py new file mode 100644 index 00000000..39706488 --- /dev/null +++ b/nck/helpers/yandex_helper.py @@ -0,0 +1,116 @@ +LANGUAGES = ["en", "ru", "uk"] + +REPORT_TYPES = [ + "ACCOUNT_PERFORMANCE_REPORT", + "CAMPAIGN_PERFORMANCE_REPORT", + "ADGROUP_PERFORMANCE_REPORT", + "AD_PERFORMANCE_REPORT", + "CRITERIA_PERFORMANCE_REPORT", + "CUSTOM_REPORT", + "REACH_AND_FREQUENCY_PERFORMANCE_REPORT", + "SEARCH_QUERY_PERFORMANCE_REPORT" +] + +FIELDS = [ + "AdFormat", + "AdGroupId", + "AdGroupName", + "AdId", + "AdNetworkType", + "Age", + "AudienceTargetId", + "AvgClickPosition", + "AvgCpc", + "AvgCpm", + "AvgImpressionFrequency", + "AvgImpressionPosition", + "AvgPageviews", + "AvgTrafficVolume", + "BounceRate", + "Bounces", + "CampaignId", + "CampaignName", + "CampaignType", + "CarrierType", + "Clicks", + "ClickType", + "ConversionRate", + "Conversions", + "Cost", + "CostPerConversion", + "Criteria", + "CriteriaId", + "CriteriaType", + "Criterion", + "CriterionId", + "CriterionType", + "Ctr", + "Date", + "Device", + "DynamicTextAdTargetId", + "ExternalNetworkName", + "Gender", + "GoalsRoi", + "ImpressionReach", + "Impressions", + "ImpressionShare", + "Keyword", + "LocationOfPresenceId", + "LocationOfPresenceName", + "MatchedKeyword", + "MatchType", + "MobilePlatform", + "Month", + "Placement", + "Profit", + "Quarter", + "Query", + "Revenue", + "RlAdjustmentId", + "Sessions", + "Slot", + "SmartBannerFilterId", + "TargetingLocationId", + "TargetingLocationName", + "Week", + "WeightedCtr", + "WeightedImpressions", + "Year" +] + +ATTRIBUTION_MODELS = ["FC", "LC", "LSC", "LYDC"] + +DATE_RANGE_TYPES = [ + "TODAY", + "YESTERDAY", + "THIS_WEEK_MON_TODAY", + "THIS_WEEK_SUN_TODAY", + "LAST_WEEK", + "LAST_BUSINESS_WEEK", + "LAST_WEEK_SUN_SAT", + "THIS_MONTH", + "LAST_MONTH", + "ALL_TIME", + "CUSTOM_DATE", + "AUT0", + "LAST_3_DAYS", + "LAST_5_DAYS", + "LAST_7_DAYS", + "LAST_14_DAYS", + "LAST_30_DAYS", + "LAST_90_DAYS", + "LAST_365_DAYS" +] + +OPERATORS = [ + "EQUALS", + "NOT_EQUALS", + "IN", + "NOT_IN", + "LESS_THAN", + "GREATER_THAN", + "STARTS_WITH_IGNORE_CASE", + "DOES_NOT_START_WITH_IGNORE_CASE", + "STARTS_WITH_ANY_IGNORE_CASE", + "DOES_NOT_START_WITH_ALL_IGNORE_CASE" +] From c9b40d17a4d65b5ab391f003f3aa823425c992d6 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Wed, 11 Mar 2020 18:55:34 +0100 Subject: [PATCH 19/66] Feature: add constructor to test CL options --- nck/readers/yandex_reader.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/nck/readers/yandex_reader.py b/nck/readers/yandex_reader.py index adb32670..1c358cf8 100644 --- a/nck/readers/yandex_reader.py +++ b/nck/readers/yandex_reader.py @@ -44,6 +44,7 @@ def convert(self, value, param, ctx): ) @click.option( "--yandex-filter", + "yandex_filters", multiple=True, type=click.Tuple([click.Choice(FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE]) ) @@ -58,6 +59,7 @@ def convert(self, value, param, ctx): ) @click.option( "--yandex-field-name", + "yandex_fields", multiple=True, type=click.Choice(FIELDS), required=True, @@ -101,4 +103,30 @@ def yandex(**kwargs): class YandexReader(Reader): - pass + def __init__( + self, + token, + report_language, + filters, + attribution_model, + max_rows, + fields, + report_name, + report_type, + date_range, + include_vat, + date_start, + date_stop + ): + self.token = token + self.report_language = report_language + self.filters = filters + self.attribution_model = attribution_model + self.max_rows = max_rows + self.fields = fields + self.report_name = report_name + self.report_type = report_type + self.date_range = date_range + self.include_vat = include_vat + self.date_start = date_start + self.date_stop = date_stop From 51d660aae4b63b5476068d9763ace10f2daed26b Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Wed, 11 Mar 2020 18:56:19 +0100 Subject: [PATCH 20/66] Feature: add skeleton of Yandex Direct API client --- nck/clients/yandex_client.py | 36 +++++++++++++++ tests/clients/test_yandex_client.py | 69 +++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 nck/clients/yandex_client.py create mode 100644 tests/clients/test_yandex_client.py diff --git a/nck/clients/yandex_client.py b/nck/clients/yandex_client.py new file mode 100644 index 00000000..682bf8ee --- /dev/null +++ b/nck/clients/yandex_client.py @@ -0,0 +1,36 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import logging + +logger = logging.getLogger("YandexClient") + + +class YandexClient: + API_VERSION = "v5" + + def __init__(self, token, language, skip_report_summary): + self.token = token + self.language = language + self.skip_report_summary = skip_report_summary + + @staticmethod + def get_formatted_request_body(**request_body_elements): + pass + + def execute_request(self, body): + pass diff --git a/tests/clients/test_yandex_client.py b/tests/clients/test_yandex_client.py new file mode 100644 index 00000000..6f5ab4a4 --- /dev/null +++ b/tests/clients/test_yandex_client.py @@ -0,0 +1,69 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase, mock + +from nck.clients.yandex_client import YandexClient + + +class YandexClientTest(TestCase): + + def test_get_formatted_request_body(self): + params = { + "SelectionCriteria": { + "Filter": [ + { + "Field": "CampaignId", + "Operator": "IN", + "Values": ["123", "456"] + } + ] + } + } + page = { + "Limit": 10 + } + field_names = ["AdGroupId", "Year", "CampaignName"] + report_name = "test", + report_type = "CAMPAIGN_PERFORMANCE_REPORT" + date_range_type = "ALL_TIME" + format = "TSV" + include_vat = "NO" + + expected_output = { + "params": params, + "Page": page, + "FieldNames": field_names, + "ReportName": report_name, + "ReportType": report_type, + "DateRangeType": date_range_type, + "Format": format, + "IncludeVAT": include_vat + } + self.assertDictEqual( + YandexClient.get_formatted_request_body( + params, + page, + field_names, + report_name, + report_type, + date_range_type, + format, + include_vat + ), + expected_output + ) From 1d7eac2e19f60c72b35c58fc0d97b5c345fc4c47 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Fri, 13 Mar 2020 17:35:16 +0100 Subject: [PATCH 21/66] Feature: add request body construction --- .../{yandex_client.py => api_client.py} | 17 +++++-- nck/helpers/api_client_helper.py | 43 ++++++++++++++++ ...st_yandex_client.py => test_api_client.py} | 47 ++++++++--------- tests/helpers/test_api_client_helper.py | 51 +++++++++++++++++++ 4 files changed, 128 insertions(+), 30 deletions(-) rename nck/clients/{yandex_client.py => api_client.py} (72%) create mode 100644 nck/helpers/api_client_helper.py rename tests/clients/{test_yandex_client.py => test_api_client.py} (64%) create mode 100644 tests/helpers/test_api_client_helper.py diff --git a/nck/clients/yandex_client.py b/nck/clients/api_client.py similarity index 72% rename from nck/clients/yandex_client.py rename to nck/clients/api_client.py index 682bf8ee..b470b76c 100644 --- a/nck/clients/yandex_client.py +++ b/nck/clients/api_client.py @@ -16,11 +16,14 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging +from typing import Dict -logger = logging.getLogger("YandexClient") +from nck.helpers.api_client_helper import get_dict_with_keys_converted_to_new_string_format +logger = logging.getLogger("ApiClient") -class YandexClient: + +class ApiClient: API_VERSION = "v5" def __init__(self, token, language, skip_report_summary): @@ -29,8 +32,14 @@ def __init__(self, token, language, skip_report_summary): self.skip_report_summary = skip_report_summary @staticmethod - def get_formatted_request_body(**request_body_elements): - pass + def get_formatted_request_body( + str_format: str = "PascalCase", + **request_body_elements + ) -> Dict: + return get_dict_with_keys_converted_to_new_string_format( + request_body_elements, + str_format + ) def execute_request(self, body): pass diff --git a/nck/helpers/api_client_helper.py b/nck/helpers/api_client_helper.py new file mode 100644 index 00000000..8991adcb --- /dev/null +++ b/nck/helpers/api_client_helper.py @@ -0,0 +1,43 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from typing import Dict +import logging + +logging.getLogger("ApiClient") + +POSSIBLE_STRING_FORMATS = ["PascalCase"] + + +def get_dict_with_keys_converted_to_new_string_format( + dictionary: Dict, str_format: str +) -> Dict: + if str_format in POSSIBLE_STRING_FORMATS and str_format == "PascalCase": + new_keys = [ + "".join(word.capitalize() for word in old_key.split("_")) + for old_key in dictionary + ] + old_keys = dictionary.copy().keys() + for old_key, new_key in zip(old_keys, new_keys): + dictionary[new_key] = dictionary.pop(old_key) + return dictionary + else: + logging.error(( + "Unable to convert to new string format. " + "Format not in %s" + ) % POSSIBLE_STRING_FORMATS) + return None diff --git a/tests/clients/test_yandex_client.py b/tests/clients/test_api_client.py similarity index 64% rename from tests/clients/test_yandex_client.py rename to tests/clients/test_api_client.py index 6f5ab4a4..c737a88c 100644 --- a/tests/clients/test_yandex_client.py +++ b/tests/clients/test_api_client.py @@ -15,55 +15,50 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from unittest import TestCase, mock +from unittest import TestCase -from nck.clients.yandex_client import YandexClient +from nck.clients.api_client import ApiClient -class YandexClientTest(TestCase): +class ApiClientTest(TestCase): def test_get_formatted_request_body(self): - params = { - "SelectionCriteria": { - "Filter": [ - { - "Field": "CampaignId", - "Operator": "IN", - "Values": ["123", "456"] - } - ] - } + selection_criteria = { + "Filter": [ + { + "Field": "CampaignId", + "Operator": "IN", + "Values": ["123", "456"] + } + ] } page = { "Limit": 10 } field_names = ["AdGroupId", "Year", "CampaignName"] - report_name = "test", + report_name = "test" report_type = "CAMPAIGN_PERFORMANCE_REPORT" date_range_type = "ALL_TIME" - format = "TSV" include_vat = "NO" expected_output = { - "params": params, + "SelectionCriteria": selection_criteria, "Page": page, "FieldNames": field_names, "ReportName": report_name, "ReportType": report_type, "DateRangeType": date_range_type, - "Format": format, "IncludeVAT": include_vat } self.assertDictEqual( - YandexClient.get_formatted_request_body( - params, - page, - field_names, - report_name, - report_type, - date_range_type, - format, - include_vat + ApiClient.get_formatted_request_body( + selection_criteria=selection_criteria, + page=page, + field_names=field_names, + report_name=report_name, + report_type=report_type, + date_range_type=date_range_type, + include_v_a_t=include_vat ), expected_output ) diff --git a/tests/helpers/test_api_client_helper.py b/tests/helpers/test_api_client_helper.py new file mode 100644 index 00000000..c43b738a --- /dev/null +++ b/tests/helpers/test_api_client_helper.py @@ -0,0 +1,51 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import unittest +import logging + +from nck.helpers.api_client_helper import get_dict_with_keys_converted_to_new_string_format + + +class ApiClientHelperTest(unittest.TestCase): + + def test_string_conversion_to_camel_case(self): + dict_with_unformatted_keys = { + "abc_de": 1, + "abc": "abc", + "abc_de_fg": 2 + } + self.assertDictEqual( + get_dict_with_keys_converted_to_new_string_format( + dict_with_unformatted_keys, + "PascalCase" + ), + { + "AbcDe": 1, + "Abc": "abc", + "AbcDeFg": 2 + } + ) + + def test_unknown_case(self): + with self.assertLogs() as cm: + logging.getLogger("ApiClient") + get_dict_with_keys_converted_to_new_string_format({}, "UnknownCase") + self.assertEqual( + cm.output, + ["ERROR:root:Unable to convert to new string format. Format not in ['PascalCase']"] + ) From aed708de503594d3ddc4954742c53d07c903e2f9 Mon Sep 17 00:00:00 2001 From: SENHAJIRHAZI Date: Mon, 16 Mar 2020 16:01:25 +0100 Subject: [PATCH 22/66] add link url asset handle case --- nck/helpers/facebook_helper.py | 14 +++++++++++++- nck/readers/facebook_reader.py | 4 ++-- nck/readers/googleads_reader.py | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index 36e82be8..8a63e097 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -143,11 +143,14 @@ "device_platform": "device_platform", "platform_position": "platform_position", "publisher_platform": "publisher_platform", - "impression_device": "impression_device" + "impression_device": "impression_device", + "link_url_asset" : {'value':'website_url'} } def get_field_value(row, field): + if is_url_asset(field): + return extract_special_field(row, field) return ( row.get(DESIRED_FIELDS[field], None) if isinstance(DESIRED_FIELDS[field], str) @@ -155,6 +158,15 @@ def get_field_value(row, field): ) +def extract_special_field(row, field): + dic = DESIRED_FIELDS[field] + return row.get(field, {}).get(dic.get('value'),None) + + +def is_url_asset(field): + return field == "link_url_asset" + + def get_nested_field_value(row, field): if DESIRED_FIELDS[field][0] not in row: return None diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 6816fbbc..0bace0d4 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -223,12 +223,14 @@ def create_time_range(start_date, end_date): return {"since": start_date.strftime(DATEFORMAT), "until": end_date.strftime(DATEFORMAT)} def format_and_yield(self, record): + import ipdb; ipdb.set_trace() report = {field: get_field_value(record, field) for field in self.desired_fields} if self.add_date_to_report: report["date"] = datetime.today().strftime(DATEFORMAT) yield report def result_generator(self, data): + # import ipdb; ipdb.set_trace() for record in data: yield from self.format_and_yield(record.export_all_data()) @@ -238,7 +240,6 @@ def get_data(self): def get_data_for_object(self, ad_object_id): params = self.get_params() - if self.ad_insights: query_mapping = {AD_OBJECT_TYPES[0]: self.run_query_on_fb_account_obj} else: @@ -256,7 +257,6 @@ def get_data_for_object(self, ad_object_id): def read(self): FacebookAdsApi.init(self.app_id, self.app_secret, self.access_token) - yield NormalizedJSONStream( "results_" + self.ad_object_type + "_" + "_".join(self.ad_object_ids), self.get_data() ) diff --git a/nck/readers/googleads_reader.py b/nck/readers/googleads_reader.py index 29e2f832..e8159146 100644 --- a/nck/readers/googleads_reader.py +++ b/nck/readers/googleads_reader.py @@ -338,6 +338,7 @@ def get_video_campaign_report_definition(self): def format_and_yield(self): report_definition = self.get_report_definition() + #import ipdb; ipdb.set_trace() stream_reader = codecs.getreader(ENCODING) if self.filter_on_video_campaigns: video_campaign_ids = self.list_video_campaign_ids() From f27cca9e9d80f36df243fa4ba1a3add9895f758e Mon Sep 17 00:00:00 2001 From: SENHAJIRHAZI Date: Mon, 16 Mar 2020 16:03:09 +0100 Subject: [PATCH 23/66] cleaned ipdbs --- nck/readers/facebook_reader.py | 1 - nck/readers/googleads_reader.py | 1 - 2 files changed, 2 deletions(-) diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 0bace0d4..aa958068 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -230,7 +230,6 @@ def format_and_yield(self, record): yield report def result_generator(self, data): - # import ipdb; ipdb.set_trace() for record in data: yield from self.format_and_yield(record.export_all_data()) diff --git a/nck/readers/googleads_reader.py b/nck/readers/googleads_reader.py index e8159146..29e2f832 100644 --- a/nck/readers/googleads_reader.py +++ b/nck/readers/googleads_reader.py @@ -338,7 +338,6 @@ def get_video_campaign_report_definition(self): def format_and_yield(self): report_definition = self.get_report_definition() - #import ipdb; ipdb.set_trace() stream_reader = codecs.getreader(ENCODING) if self.filter_on_video_campaigns: video_campaign_ids = self.list_video_campaign_ids() From 46b066ca433cd56a7d0a11042eae6f8b3af230c8 Mon Sep 17 00:00:00 2001 From: SENHAJIRHAZI Date: Mon, 16 Mar 2020 16:12:35 +0100 Subject: [PATCH 24/66] reformat nck/fb_helper/reader --- nck/helpers/facebook_helper.py | 35 +++++++++++++---- nck/readers/facebook_reader.py | 68 ++++++++++++++++++++++++++-------- 2 files changed, 80 insertions(+), 23 deletions(-) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index 8a63e097..769ec173 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -17,10 +17,14 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from facebook_business.adobjects.adsinsights import AdsInsights -BREAKDOWNS_POSSIBLE_VALUES = [v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__")] +BREAKDOWNS_POSSIBLE_VALUES = [ + v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__") +] ACTION_BREAKDOWNS_POSSIBLE_VALUES = [ - v for k, v in AdsInsights.ActionBreakdowns.__dict__.items() if not k.startswith("__") + v + for k, v in AdsInsights.ActionBreakdowns.__dict__.items() + if not k.startswith("__") ] AD_OBJECT_TYPES = ["adaccount", "campaign", "adset", "ad", "user"] @@ -88,7 +92,9 @@ "status", ] -DATE_PRESETS = [v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__")] +DATE_PRESETS = [ + v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__") +] DESIRED_FIELDS = { "date_start": "date_start", @@ -108,9 +114,15 @@ "post_engagement": ("actions", "post_engagement"), "purchases": ("actions", "omni_purchase"), "website_purchases": ("actions", "offsite_conversion.fb_pixel_purchase"), - "purchases_conversion_value": ("action_values", "offsite_conversion.fb_pixel_purchase"), + "purchases_conversion_value": ( + "action_values", + "offsite_conversion.fb_pixel_purchase", + ), "website_purchases_conversion_value": ("action_values", "omni_purchase"), - "website_purchase_roas": ("website_purchase_roas", "offsite_conversion.fb_pixel_purchase"), + "website_purchase_roas": ( + "website_purchase_roas", + "offsite_conversion.fb_pixel_purchase", + ), "objective": "objective", "reach": "reach", "spend": "spend", @@ -144,7 +156,7 @@ "platform_position": "platform_position", "publisher_platform": "publisher_platform", "impression_device": "impression_device", - "link_url_asset" : {'value':'website_url'} + "link_url_asset": {"value": "website_url"}, } @@ -160,7 +172,7 @@ def get_field_value(row, field): def extract_special_field(row, field): dic = DESIRED_FIELDS[field] - return row.get(field, {}).get(dic.get('value'),None) + return row.get(field, {}).get(dic.get("value"), None) def is_url_asset(field): @@ -170,5 +182,12 @@ def is_url_asset(field): def get_nested_field_value(row, field): if DESIRED_FIELDS[field][0] not in row: return None - nested_field = next((x for x in row[DESIRED_FIELDS[field][0]] if x["action_type"] == DESIRED_FIELDS[field][1]), {}) + nested_field = next( + ( + x + for x in row[DESIRED_FIELDS[field][0]] + if x["action_type"] == DESIRED_FIELDS[field][1] + ), + {}, + ) return nested_field["value"] if nested_field else None diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index aa958068..d31248e8 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -53,13 +53,25 @@ def check_object_id(ctx, param, values): @click.command(name="read_facebook") -@click.option("--facebook-app-id", default="", help="Not mandatory for AdsInsights reporting if access-token provided") @click.option( - "--facebook-app-secret", default="", help="Not mandatory for AdsInsights reporting if access-token provided" + "--facebook-app-id", + default="", + help="Not mandatory for AdsInsights reporting if access-token provided", +) +@click.option( + "--facebook-app-secret", + default="", + help="Not mandatory for AdsInsights reporting if access-token provided", ) @click.option("--facebook-access-token", required=True) -@click.option("--facebook-ad-object-id", required=True, multiple=True, callback=check_object_id) -@click.option("--facebook-ad-object-type", type=click.Choice(AD_OBJECT_TYPES), default=AD_OBJECT_TYPES[0]) +@click.option( + "--facebook-ad-object-id", required=True, multiple=True, callback=check_object_id +) +@click.option( + "--facebook-ad-object-type", + type=click.Choice(AD_OBJECT_TYPES), + default=AD_OBJECT_TYPES[0], +) @click.option( "--facebook-breakdown", multiple=True, @@ -87,7 +99,9 @@ def check_object_id(ctx, param, values): help="Represents the granularity of result", ) @click.option("--facebook-time-increment") -@click.option("--facebook-field", multiple=True, help="Facebook API fields for the request") +@click.option( + "--facebook-field", multiple=True, help="Facebook API fields for the request" +) @click.option( "--facebook-desired-field", multiple=True, @@ -163,7 +177,10 @@ def run_query_on_fb_account_obj_conf(self, params, ad_object_id): account = AdAccount("act_" + ad_object_id) campaigns = account.get_campaigns() for el in chain( - *[self.run_query_on_fb_campaign_obj_conf(params, campaign.get("id")) for campaign in campaigns] + *[ + self.run_query_on_fb_campaign_obj_conf(params, campaign.get("id")) + for campaign in campaigns + ] ): yield el @@ -176,12 +193,18 @@ def run_query_on_fb_campaign_obj_conf(self, params, ad_object_id): elif self.level == LEVELS_POSSIBLE_VALUES[1]: for el in chain( - *[self.run_query_on_fb_adset_obj_conf(params, adset.get("id")) for adset in campaign.get_ad_sets()] + *[ + self.run_query_on_fb_adset_obj_conf(params, adset.get("id")) + for adset in campaign.get_ad_sets() + ] ): yield el else: raise ClickException( - "Received level: " + self.level + ". Available levels are " + repr(LEVELS_POSSIBLE_VALUES[1:3]) + "Received level: " + + self.level + + ". Available levels are " + + repr(LEVELS_POSSIBLE_VALUES[1:3]) ) @retry @@ -191,7 +214,10 @@ def run_query_on_fb_adset_obj_conf(self, params, ad_object_id, level): val_adset = adset.api_get(fields=self.desired_fields, params=params) yield val_adset else: - raise ClickException("Adset setup is available at 'adset' level. Received level: " + self.level) + raise ClickException( + "Adset setup is available at 'adset' level. Received level: " + + self.level + ) def get_params(self): params = { @@ -208,7 +234,9 @@ def add_period_to_parameters(self, params): params["time_increment"] = self.time_increment if self.start_date and self.end_date: logging.info("Date format used for request : start_date and end_date") - params["time_range"] = self.create_time_range(self.start_date, self.end_date) + params["time_range"] = self.create_time_range( + self.start_date, self.end_date + ) elif self.date_preset: logging.info("Date format used for request : date_preset") params["date_preset"] = self.date_preset @@ -220,11 +248,18 @@ def add_period_to_parameters(self, params): @staticmethod def create_time_range(start_date, end_date): - return {"since": start_date.strftime(DATEFORMAT), "until": end_date.strftime(DATEFORMAT)} + return { + "since": start_date.strftime(DATEFORMAT), + "until": end_date.strftime(DATEFORMAT), + } def format_and_yield(self, record): - import ipdb; ipdb.set_trace() - report = {field: get_field_value(record, field) for field in self.desired_fields} + import ipdb + + ipdb.set_trace() + report = { + field: get_field_value(record, field) for field in self.desired_fields + } if self.add_date_to_report: report["date"] = datetime.today().strftime(DATEFORMAT) yield report @@ -251,11 +286,14 @@ def get_data_for_object(self, ad_object_id): query = query_mapping[self.ad_object_type] data = query(params, ad_object_id) except KeyError: - raise ClickException("`{}` is not a valid adObject type".format(self.ad_object_type)) + raise ClickException( + "`{}` is not a valid adObject type".format(self.ad_object_type) + ) yield from self.result_generator(data) def read(self): FacebookAdsApi.init(self.app_id, self.app_secret, self.access_token) yield NormalizedJSONStream( - "results_" + self.ad_object_type + "_" + "_".join(self.ad_object_ids), self.get_data() + "results_" + self.ad_object_type + "_" + "_".join(self.ad_object_ids), + self.get_data(), ) From 0df896aba56efff42acada75659b1174ef5c6a07 Mon Sep 17 00:00:00 2001 From: SENHAJIRHAZI Date: Mon, 16 Mar 2020 18:50:37 +0100 Subject: [PATCH 25/66] clean ipdb --- nck/readers/facebook_reader.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index d31248e8..7fa24576 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -254,9 +254,6 @@ def create_time_range(start_date, end_date): } def format_and_yield(self, record): - import ipdb - - ipdb.set_trace() report = { field: get_field_value(record, field) for field in self.desired_fields } From e1752bddcb761afe2336a321a7a6ebd66ff397c0 Mon Sep 17 00:00:00 2001 From: SENHAJIRHAZI Date: Fri, 20 Mar 2020 15:32:25 +0100 Subject: [PATCH 26/66] linting lenght 120 --- .env | 4 +-- nck/helpers/facebook_helper.py | 31 ++++------------ nck/readers/facebook_reader.py | 64 ++++++++-------------------------- 3 files changed, 22 insertions(+), 77 deletions(-) diff --git a/.env b/.env index 438ecf16..cf087144 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connector-kit -DOCKER_TAG=1.4.0 +DOCKER_IMAGE=nautilus-connector-kit-dev +DOCKER_TAG=FBURL DOCKER_REGISTRY=eu.gcr.io diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index 769ec173..8dc822f2 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -17,14 +17,10 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from facebook_business.adobjects.adsinsights import AdsInsights -BREAKDOWNS_POSSIBLE_VALUES = [ - v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__") -] +BREAKDOWNS_POSSIBLE_VALUES = [v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__")] ACTION_BREAKDOWNS_POSSIBLE_VALUES = [ - v - for k, v in AdsInsights.ActionBreakdowns.__dict__.items() - if not k.startswith("__") + v for k, v in AdsInsights.ActionBreakdowns.__dict__.items() if not k.startswith("__") ] AD_OBJECT_TYPES = ["adaccount", "campaign", "adset", "ad", "user"] @@ -92,9 +88,7 @@ "status", ] -DATE_PRESETS = [ - v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__") -] +DATE_PRESETS = [v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__")] DESIRED_FIELDS = { "date_start": "date_start", @@ -114,15 +108,9 @@ "post_engagement": ("actions", "post_engagement"), "purchases": ("actions", "omni_purchase"), "website_purchases": ("actions", "offsite_conversion.fb_pixel_purchase"), - "purchases_conversion_value": ( - "action_values", - "offsite_conversion.fb_pixel_purchase", - ), + "purchases_conversion_value": ("action_values", "offsite_conversion.fb_pixel_purchase"), "website_purchases_conversion_value": ("action_values", "omni_purchase"), - "website_purchase_roas": ( - "website_purchase_roas", - "offsite_conversion.fb_pixel_purchase", - ), + "website_purchase_roas": ("website_purchase_roas", "offsite_conversion.fb_pixel_purchase"), "objective": "objective", "reach": "reach", "spend": "spend", @@ -182,12 +170,5 @@ def is_url_asset(field): def get_nested_field_value(row, field): if DESIRED_FIELDS[field][0] not in row: return None - nested_field = next( - ( - x - for x in row[DESIRED_FIELDS[field][0]] - if x["action_type"] == DESIRED_FIELDS[field][1] - ), - {}, - ) + nested_field = next((x for x in row[DESIRED_FIELDS[field][0]] if x["action_type"] == DESIRED_FIELDS[field][1]), {}) return nested_field["value"] if nested_field else None diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 7fa24576..3e0f4fc3 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -53,25 +53,13 @@ def check_object_id(ctx, param, values): @click.command(name="read_facebook") +@click.option("--facebook-app-id", default="", help="Not mandatory for AdsInsights reporting if access-token provided") @click.option( - "--facebook-app-id", - default="", - help="Not mandatory for AdsInsights reporting if access-token provided", -) -@click.option( - "--facebook-app-secret", - default="", - help="Not mandatory for AdsInsights reporting if access-token provided", + "--facebook-app-secret", default="", help="Not mandatory for AdsInsights reporting if access-token provided" ) @click.option("--facebook-access-token", required=True) -@click.option( - "--facebook-ad-object-id", required=True, multiple=True, callback=check_object_id -) -@click.option( - "--facebook-ad-object-type", - type=click.Choice(AD_OBJECT_TYPES), - default=AD_OBJECT_TYPES[0], -) +@click.option("--facebook-ad-object-id", required=True, multiple=True, callback=check_object_id) +@click.option("--facebook-ad-object-type", type=click.Choice(AD_OBJECT_TYPES), default=AD_OBJECT_TYPES[0]) @click.option( "--facebook-breakdown", multiple=True, @@ -99,9 +87,7 @@ def check_object_id(ctx, param, values): help="Represents the granularity of result", ) @click.option("--facebook-time-increment") -@click.option( - "--facebook-field", multiple=True, help="Facebook API fields for the request" -) +@click.option("--facebook-field", multiple=True, help="Facebook API fields for the request") @click.option( "--facebook-desired-field", multiple=True, @@ -177,10 +163,7 @@ def run_query_on_fb_account_obj_conf(self, params, ad_object_id): account = AdAccount("act_" + ad_object_id) campaigns = account.get_campaigns() for el in chain( - *[ - self.run_query_on_fb_campaign_obj_conf(params, campaign.get("id")) - for campaign in campaigns - ] + *[self.run_query_on_fb_campaign_obj_conf(params, campaign.get("id")) for campaign in campaigns] ): yield el @@ -193,18 +176,12 @@ def run_query_on_fb_campaign_obj_conf(self, params, ad_object_id): elif self.level == LEVELS_POSSIBLE_VALUES[1]: for el in chain( - *[ - self.run_query_on_fb_adset_obj_conf(params, adset.get("id")) - for adset in campaign.get_ad_sets() - ] + *[self.run_query_on_fb_adset_obj_conf(params, adset.get("id")) for adset in campaign.get_ad_sets()] ): yield el else: raise ClickException( - "Received level: " - + self.level - + ". Available levels are " - + repr(LEVELS_POSSIBLE_VALUES[1:3]) + "Received level: " + self.level + ". Available levels are " + repr(LEVELS_POSSIBLE_VALUES[1:3]) ) @retry @@ -214,10 +191,7 @@ def run_query_on_fb_adset_obj_conf(self, params, ad_object_id, level): val_adset = adset.api_get(fields=self.desired_fields, params=params) yield val_adset else: - raise ClickException( - "Adset setup is available at 'adset' level. Received level: " - + self.level - ) + raise ClickException("Adset setup is available at 'adset' level. Received level: " + self.level) def get_params(self): params = { @@ -234,9 +208,7 @@ def add_period_to_parameters(self, params): params["time_increment"] = self.time_increment if self.start_date and self.end_date: logging.info("Date format used for request : start_date and end_date") - params["time_range"] = self.create_time_range( - self.start_date, self.end_date - ) + params["time_range"] = self.create_time_range(self.start_date, self.end_date) elif self.date_preset: logging.info("Date format used for request : date_preset") params["date_preset"] = self.date_preset @@ -248,15 +220,10 @@ def add_period_to_parameters(self, params): @staticmethod def create_time_range(start_date, end_date): - return { - "since": start_date.strftime(DATEFORMAT), - "until": end_date.strftime(DATEFORMAT), - } + return {"since": start_date.strftime(DATEFORMAT), "until": end_date.strftime(DATEFORMAT)} def format_and_yield(self, record): - report = { - field: get_field_value(record, field) for field in self.desired_fields - } + report = {field: get_field_value(record, field) for field in self.desired_fields} if self.add_date_to_report: report["date"] = datetime.today().strftime(DATEFORMAT) yield report @@ -283,14 +250,11 @@ def get_data_for_object(self, ad_object_id): query = query_mapping[self.ad_object_type] data = query(params, ad_object_id) except KeyError: - raise ClickException( - "`{}` is not a valid adObject type".format(self.ad_object_type) - ) + raise ClickException("`{}` is not a valid adObject type".format(self.ad_object_type)) yield from self.result_generator(data) def read(self): FacebookAdsApi.init(self.app_id, self.app_secret, self.access_token) yield NormalizedJSONStream( - "results_" + self.ad_object_type + "_" + "_".join(self.ad_object_ids), - self.get_data(), + "results_" + self.ad_object_type + "_" + "_".join(self.ad_object_ids), self.get_data() ) From 53c0d3cc4e5a26f3d2e74fd1a5d09cea34b0e416 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Mon, 23 Mar 2020 18:20:08 +0100 Subject: [PATCH 27/66] Feature: get yandex campaign object report --- nck/clients/api_client.py | 22 ++++++--- nck/helpers/yandex_helper.py | 41 +++++++++++++++- nck/readers/yandex_reader.py | 75 ++++++++++++++++++++--------- requirements.txt | 1 + tests/readers/test_yandex_reader.py | 0 5 files changed, 107 insertions(+), 32 deletions(-) create mode 100644 tests/readers/test_yandex_reader.py diff --git a/nck/clients/api_client.py b/nck/clients/api_client.py index b470b76c..39012bc2 100644 --- a/nck/clients/api_client.py +++ b/nck/clients/api_client.py @@ -16,7 +16,9 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging -from typing import Dict +from typing import Dict, Any + +from requests_toolbelt import sessions from nck.helpers.api_client_helper import get_dict_with_keys_converted_to_new_string_format @@ -24,12 +26,10 @@ class ApiClient: - API_VERSION = "v5" - def __init__(self, token, language, skip_report_summary): + def __init__(self, token, base_url): self.token = token - self.language = language - self.skip_report_summary = skip_report_summary + self.session = sessions.BaseUrlSession(base_url=base_url) @staticmethod def get_formatted_request_body( @@ -41,5 +41,13 @@ def get_formatted_request_body( str_format ) - def execute_request(self, body): - pass + def execute_request( + self, + method: str = "GET", + url: str = "", + body: Dict[str, Any] = None, + headers: Dict[str, str] = None + ): + headers["Authorization"] = f"Bearer {self.token}" + response = self.session.request(method, url, json=body, headers=headers) + return response diff --git a/nck/helpers/yandex_helper.py b/nck/helpers/yandex_helper.py index 39706488..fc7f2a1e 100644 --- a/nck/helpers/yandex_helper.py +++ b/nck/helpers/yandex_helper.py @@ -8,10 +8,33 @@ "CRITERIA_PERFORMANCE_REPORT", "CUSTOM_REPORT", "REACH_AND_FREQUENCY_PERFORMANCE_REPORT", - "SEARCH_QUERY_PERFORMANCE_REPORT" + "SEARCH_QUERY_PERFORMANCE_REPORT", + "CAMPAIGN_OBJECT_REPORT" ] FIELDS = [ + "BlockedIps", + "ExcludedSites", + "Currency", + "DailyBudget", + "Notification", + "EndDate", + "Funds", + "ClientInfo", + "Id", + "Name", + "NegativeKeywords", + "RepresentedBy", + "StartDate", + "Statistics", + "State", + "Status", + "StatusPayment", + "StatusClarification", + "SourceId", + "TimeTargeting", + "TimeZone", + "Type", "AdFormat", "AdGroupId", "AdGroupName", @@ -114,3 +137,19 @@ "STARTS_WITH_ANY_IGNORE_CASE", "DOES_NOT_START_WITH_ALL_IGNORE_CASE" ] + +CAMPAIGN_STATES = [ + "ARCHIVED", + "CONVERTED", + "ENDED", + "OFF", + "ON", + "SUSPENDED" +] + +CAMPAIGN_STATUSES = [ + "ACCEPTED", + "DRAFT", + "MODERATION", + "REJECTED" +] diff --git a/nck/readers/yandex_reader.py b/nck/readers/yandex_reader.py index 1c358cf8..38837b31 100644 --- a/nck/readers/yandex_reader.py +++ b/nck/readers/yandex_reader.py @@ -22,8 +22,10 @@ from nck.utils.args import extract_args from nck.helpers.yandex_helper import ( LANGUAGES, REPORT_TYPES, FIELDS, ATTRIBUTION_MODELS, - DATE_RANGE_TYPES, OPERATORS + DATE_RANGE_TYPES, OPERATORS, CAMPAIGN_STATES, CAMPAIGN_STATUSES ) +from nck.clients.api_client import ApiClient +from nck.streams.json_stream import JSONStream class StrList(click.ParamType): @@ -42,6 +44,24 @@ def convert(self, value, param, ctx): type=click.Choice(LANGUAGES), default="en" ) +@click.option( + "--yandex-campaign-id", + multiple=True +) +@click.option( + "--yandex-campaign-state", + multiple=True, + type=click.Choice(CAMPAIGN_STATES) +) +@click.option( + "--yandex-campaign-status", + multiple=True, + type=click.Choice(CAMPAIGN_STATUSES) +) +@click.option( + "--yandex-campaign-payment-allowed", + type=click.BOOL +) @click.option( "--yandex-filter", "yandex_filters", @@ -70,8 +90,7 @@ def convert(self, value, param, ctx): ) ) @click.option( - "--yandex-report-name", - required=True + "--yandex-report-name" ) @click.option( "--yandex-report-type", @@ -80,13 +99,11 @@ def convert(self, value, param, ctx): ) @click.option( "--yandex-date-range", - type=click.Choice(DATE_RANGE_TYPES), - required=True + type=click.Choice(DATE_RANGE_TYPES) ) @click.option( "--yandex-include-vat", type=click.BOOL, - default=False, help="Whether to include VAT in the monetary amounts in the report." ) @click.option( @@ -102,31 +119,41 @@ def yandex(**kwargs): return YandexReader(**extract_args("yandex_", kwargs)) +YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" + + class YandexReader(Reader): + def __init__( self, token, - report_language, - filters, - attribution_model, - max_rows, fields, - report_name, report_type, - date_range, - include_vat, - date_start, - date_stop + **kwargs ): self.token = token - self.report_language = report_language - self.filters = filters - self.attribution_model = attribution_model - self.max_rows = max_rows self.fields = fields - self.report_name = report_name self.report_type = report_type - self.date_range = date_range - self.include_vat = include_vat - self.date_start = date_start - self.date_stop = date_stop + self.kwargs = kwargs + + def result_generator(self): + api_client = ApiClient(self.token, YANDEX_DIRECT_API_BASE_URL) + request_body = self._build_query_body() + response = api_client.execute_request(url="campaigns", body=request_body, headers={}) + yield response.json() + + def _build_query_body(self): + body = {} + if self.report_type == "CAMPAIGN_OBJECT_REPORT": + body["method"] = "get" + body["params"] = ApiClient.get_formatted_request_body( + field_names=self.fields, + selection_criteria={} + ) + return body + + def read(self): + yield JSONStream( + f"results_{self.report_type}", + self.result_generator() + ) diff --git a/requirements.txt b/requirements.txt index 62d96a95..554647a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,6 +48,7 @@ PyYAML==5.2 radarly-py==1.0.10 redis==3.3.11 requests==2.22.0 +requests-toolbet==0.9.1 rsa==4.0 s3transfer==0.2.1 six==1.13.0 diff --git a/tests/readers/test_yandex_reader.py b/tests/readers/test_yandex_reader.py new file mode 100644 index 00000000..e69de29b From 121743f844f669827698ef2401457b34665b180f Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Mon, 23 Mar 2020 18:24:14 +0100 Subject: [PATCH 28/66] Fix: typo in requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 554647a3..71a60889 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,7 +48,7 @@ PyYAML==5.2 radarly-py==1.0.10 redis==3.3.11 requests==2.22.0 -requests-toolbet==0.9.1 +requests-toolbelt==0.9.1 rsa==4.0 s3transfer==0.2.1 six==1.13.0 From 095d381fa4179c80288138bc5ae2dd4baa7e8530 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Mon, 23 Mar 2020 18:57:49 +0100 Subject: [PATCH 29/66] Test: test campaign object report --- nck/readers/yandex_reader.py | 2 +- tests/readers/test_yandex_reader.py | 57 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/nck/readers/yandex_reader.py b/nck/readers/yandex_reader.py index 38837b31..a7cc5bb7 100644 --- a/nck/readers/yandex_reader.py +++ b/nck/readers/yandex_reader.py @@ -132,7 +132,7 @@ def __init__( **kwargs ): self.token = token - self.fields = fields + self.fields = list(fields) self.report_type = report_type self.kwargs = kwargs diff --git a/tests/readers/test_yandex_reader.py b/tests/readers/test_yandex_reader.py index e69de29b..f0faeb82 100644 --- a/tests/readers/test_yandex_reader.py +++ b/tests/readers/test_yandex_reader.py @@ -0,0 +1,57 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import unittest + +from nck.readers.yandex_reader import YandexReader + + +class TestYandexReader(unittest.TestCase): + + def test_get_query_body(self): + kwargs = { + "report_language": "en", + "campaign_id": (), + "campaign_state": (), + "campaign_status": (), + "campaign_payment_allowed": None, + "filters": (), + "attribution_model": (), + "max_rows": None, + "report_name": None, + "date_range": None, + "include_vat": None, + "date_start": None, + "date_stop": None + } + reader = YandexReader( + "123", + ("Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"), + "CAMPAIGN_OBJECT_REPORT", + **kwargs + ) + + expected_query_body = { + "method": "get", + "params": { + "SelectionCriteria": { + }, + "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"] + } + } + + self.assertDictEqual(reader._build_query_body(), expected_query_body) From ad068cb0ac08550a0ec99951386b3cfa89e2e0ea Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 24 Mar 2020 15:20:51 +0100 Subject: [PATCH 30/66] Fix: remove method to create request body --- nck/clients/api_client.py | 12 ------------ nck/helpers/api_client_helper.py | 4 ++-- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/nck/clients/api_client.py b/nck/clients/api_client.py index 39012bc2..469083d2 100644 --- a/nck/clients/api_client.py +++ b/nck/clients/api_client.py @@ -20,8 +20,6 @@ from requests_toolbelt import sessions -from nck.helpers.api_client_helper import get_dict_with_keys_converted_to_new_string_format - logger = logging.getLogger("ApiClient") @@ -31,16 +29,6 @@ def __init__(self, token, base_url): self.token = token self.session = sessions.BaseUrlSession(base_url=base_url) - @staticmethod - def get_formatted_request_body( - str_format: str = "PascalCase", - **request_body_elements - ) -> Dict: - return get_dict_with_keys_converted_to_new_string_format( - request_body_elements, - str_format - ) - def execute_request( self, method: str = "GET", diff --git a/nck/helpers/api_client_helper.py b/nck/helpers/api_client_helper.py index 8991adcb..daeb8926 100644 --- a/nck/helpers/api_client_helper.py +++ b/nck/helpers/api_client_helper.py @@ -15,7 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from typing import Dict +from typing import Dict, Any import logging logging.getLogger("ApiClient") @@ -24,7 +24,7 @@ def get_dict_with_keys_converted_to_new_string_format( - dictionary: Dict, str_format: str + dictionary: Dict[str, Any], str_format: str = "PascalCase" ) -> Dict: if str_format in POSSIBLE_STRING_FORMATS and str_format == "PascalCase": new_keys = [ From 944fbe41c358bea37e962c718fd933aeac311a72 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 24 Mar 2020 16:19:44 +0100 Subject: [PATCH 31/66] Fix: adapt helper method to new design --- nck/helpers/api_client_helper.py | 13 +++++++------ tests/helpers/test_api_client_helper.py | 12 ++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/nck/helpers/api_client_helper.py b/nck/helpers/api_client_helper.py index daeb8926..5fa3d667 100644 --- a/nck/helpers/api_client_helper.py +++ b/nck/helpers/api_client_helper.py @@ -15,7 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from typing import Dict, Any +from typing import Dict import logging logging.getLogger("ApiClient") @@ -24,17 +24,18 @@ def get_dict_with_keys_converted_to_new_string_format( - dictionary: Dict[str, Any], str_format: str = "PascalCase" + str_format: str = "PascalCase", **kwargs ) -> Dict: if str_format in POSSIBLE_STRING_FORMATS and str_format == "PascalCase": new_keys = [ "".join(word.capitalize() for word in old_key.split("_")) - for old_key in dictionary + for old_key in kwargs ] - old_keys = dictionary.copy().keys() + old_keys = kwargs.copy().keys() + formatted_dict = {} for old_key, new_key in zip(old_keys, new_keys): - dictionary[new_key] = dictionary.pop(old_key) - return dictionary + formatted_dict[new_key] = kwargs.pop(old_key) + return formatted_dict else: logging.error(( "Unable to convert to new string format. " diff --git a/tests/helpers/test_api_client_helper.py b/tests/helpers/test_api_client_helper.py index c43b738a..6d8aea16 100644 --- a/tests/helpers/test_api_client_helper.py +++ b/tests/helpers/test_api_client_helper.py @@ -24,15 +24,11 @@ class ApiClientHelperTest(unittest.TestCase): def test_string_conversion_to_camel_case(self): - dict_with_unformatted_keys = { - "abc_de": 1, - "abc": "abc", - "abc_de_fg": 2 - } self.assertDictEqual( get_dict_with_keys_converted_to_new_string_format( - dict_with_unformatted_keys, - "PascalCase" + abc_de=1, + abc="abc", + abc_de_fg=2 ), { "AbcDe": 1, @@ -44,7 +40,7 @@ def test_string_conversion_to_camel_case(self): def test_unknown_case(self): with self.assertLogs() as cm: logging.getLogger("ApiClient") - get_dict_with_keys_converted_to_new_string_format({}, "UnknownCase") + get_dict_with_keys_converted_to_new_string_format("UnknownCase") self.assertEqual( cm.output, ["ERROR:root:Unable to convert to new string format. Format not in ['PascalCase']"] From f7f715d79f6765a250967e9e791f43835296c1f9 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 24 Mar 2020 16:20:57 +0100 Subject: [PATCH 32/66] Fix: delete one class to have two separate yandex commands --- nck/readers/yandex_reader.py | 159 ---------------------------- tests/readers/test_yandex_reader.py | 57 ---------- 2 files changed, 216 deletions(-) delete mode 100644 nck/readers/yandex_reader.py delete mode 100644 tests/readers/test_yandex_reader.py diff --git a/nck/readers/yandex_reader.py b/nck/readers/yandex_reader.py deleted file mode 100644 index a7cc5bb7..00000000 --- a/nck/readers/yandex_reader.py +++ /dev/null @@ -1,159 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import click - -from nck.commands.command import processor -from nck.readers.reader import Reader -from nck.utils.args import extract_args -from nck.helpers.yandex_helper import ( - LANGUAGES, REPORT_TYPES, FIELDS, ATTRIBUTION_MODELS, - DATE_RANGE_TYPES, OPERATORS, CAMPAIGN_STATES, CAMPAIGN_STATUSES -) -from nck.clients.api_client import ApiClient -from nck.streams.json_stream import JSONStream - - -class StrList(click.ParamType): - - def convert(self, value, param, ctx): - return value.split(",") - - -STR_LIST_TYPE = StrList() - - -@click.command(name="read_yandex") -@click.option("--yandex-token", required=True) -@click.option( - "--yandex-report-language", - type=click.Choice(LANGUAGES), - default="en" -) -@click.option( - "--yandex-campaign-id", - multiple=True -) -@click.option( - "--yandex-campaign-state", - multiple=True, - type=click.Choice(CAMPAIGN_STATES) -) -@click.option( - "--yandex-campaign-status", - multiple=True, - type=click.Choice(CAMPAIGN_STATUSES) -) -@click.option( - "--yandex-campaign-payment-allowed", - type=click.BOOL -) -@click.option( - "--yandex-filter", - "yandex_filters", - multiple=True, - type=click.Tuple([click.Choice(FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE]) -) -@click.option( - "--yandex-attribution-model", - multiple=True, - type=click.Choice(ATTRIBUTION_MODELS) -) -@click.option( - "--yandex-max-rows", - type=int -) -@click.option( - "--yandex-field-name", - "yandex_fields", - multiple=True, - type=click.Choice(FIELDS), - required=True, - help=( - "Fields to output in the report (columns)." - "For the full list of fields and their meanings, " - "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" - ) -) -@click.option( - "--yandex-report-name" -) -@click.option( - "--yandex-report-type", - type=click.Choice(REPORT_TYPES), - required=True -) -@click.option( - "--yandex-date-range", - type=click.Choice(DATE_RANGE_TYPES) -) -@click.option( - "--yandex-include-vat", - type=click.BOOL, - help="Whether to include VAT in the monetary amounts in the report." -) -@click.option( - "--yandex-date-start", - type=click.DateTime() -) -@click.option( - "--yandex-date-stop", - type=click.DateTime() -) -@processor("yandex_token") -def yandex(**kwargs): - return YandexReader(**extract_args("yandex_", kwargs)) - - -YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" - - -class YandexReader(Reader): - - def __init__( - self, - token, - fields, - report_type, - **kwargs - ): - self.token = token - self.fields = list(fields) - self.report_type = report_type - self.kwargs = kwargs - - def result_generator(self): - api_client = ApiClient(self.token, YANDEX_DIRECT_API_BASE_URL) - request_body = self._build_query_body() - response = api_client.execute_request(url="campaigns", body=request_body, headers={}) - yield response.json() - - def _build_query_body(self): - body = {} - if self.report_type == "CAMPAIGN_OBJECT_REPORT": - body["method"] = "get" - body["params"] = ApiClient.get_formatted_request_body( - field_names=self.fields, - selection_criteria={} - ) - return body - - def read(self): - yield JSONStream( - f"results_{self.report_type}", - self.result_generator() - ) diff --git a/tests/readers/test_yandex_reader.py b/tests/readers/test_yandex_reader.py deleted file mode 100644 index f0faeb82..00000000 --- a/tests/readers/test_yandex_reader.py +++ /dev/null @@ -1,57 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import unittest - -from nck.readers.yandex_reader import YandexReader - - -class TestYandexReader(unittest.TestCase): - - def test_get_query_body(self): - kwargs = { - "report_language": "en", - "campaign_id": (), - "campaign_state": (), - "campaign_status": (), - "campaign_payment_allowed": None, - "filters": (), - "attribution_model": (), - "max_rows": None, - "report_name": None, - "date_range": None, - "include_vat": None, - "date_start": None, - "date_stop": None - } - reader = YandexReader( - "123", - ("Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"), - "CAMPAIGN_OBJECT_REPORT", - **kwargs - ) - - expected_query_body = { - "method": "get", - "params": { - "SelectionCriteria": { - }, - "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"] - } - } - - self.assertDictEqual(reader._build_query_body(), expected_query_body) From fed73762d1c9f995e074874f4090f346cb55eae6 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 24 Mar 2020 16:26:02 +0100 Subject: [PATCH 33/66] Fix: separate lists of fields to be specific --- nck/helpers/yandex_helper.py | 52 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/nck/helpers/yandex_helper.py b/nck/helpers/yandex_helper.py index fc7f2a1e..b93f07e3 100644 --- a/nck/helpers/yandex_helper.py +++ b/nck/helpers/yandex_helper.py @@ -8,33 +8,10 @@ "CRITERIA_PERFORMANCE_REPORT", "CUSTOM_REPORT", "REACH_AND_FREQUENCY_PERFORMANCE_REPORT", - "SEARCH_QUERY_PERFORMANCE_REPORT", - "CAMPAIGN_OBJECT_REPORT" + "SEARCH_QUERY_PERFORMANCE_REPORT" ] -FIELDS = [ - "BlockedIps", - "ExcludedSites", - "Currency", - "DailyBudget", - "Notification", - "EndDate", - "Funds", - "ClientInfo", - "Id", - "Name", - "NegativeKeywords", - "RepresentedBy", - "StartDate", - "Statistics", - "State", - "Status", - "StatusPayment", - "StatusClarification", - "SourceId", - "TimeTargeting", - "TimeZone", - "Type", +STATS_FIELDS = [ "AdFormat", "AdGroupId", "AdGroupName", @@ -101,6 +78,31 @@ "Year" ] +CAMPAIGN_FIELDS = [ + "BlockedIps", + "ExcludedSites", + "Currency", + "DailyBudget", + "Notification", + "EndDate", + "Funds", + "ClientInfo", + "Id", + "Name", + "NegativeKeywords", + "RepresentedBy", + "StartDate", + "Statistics", + "State", + "Status", + "StatusPayment", + "StatusClarification", + "SourceId", + "TimeTargeting", + "TimeZone", + "Type" +] + ATTRIBUTION_MODELS = ["FC", "LC", "LSC", "LYDC"] DATE_RANGE_TYPES = [ From 6bda32daf5f4bd4d0747d7c1b0d0ad370f297a81 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 24 Mar 2020 18:19:36 +0100 Subject: [PATCH 34/66] Fix: 2 classes for totally different yandex reports --- nck/helpers/yandex_helper.py | 5 + nck/readers/__init__.py | 6 +- nck/readers/yandex_campaign_reader.py | 118 +++++++++++++++ nck/readers/yandex_statistics_reader.py | 143 +++++++++++++++++++ tests/readers/test_yandex_campaign_reader.py | 77 ++++++++++ 5 files changed, 346 insertions(+), 3 deletions(-) create mode 100644 nck/readers/yandex_campaign_reader.py create mode 100644 nck/readers/yandex_statistics_reader.py create mode 100644 tests/readers/test_yandex_campaign_reader.py diff --git a/nck/helpers/yandex_helper.py b/nck/helpers/yandex_helper.py index b93f07e3..536dfaf1 100644 --- a/nck/helpers/yandex_helper.py +++ b/nck/helpers/yandex_helper.py @@ -155,3 +155,8 @@ "MODERATION", "REJECTED" ] + +CAMPAIGN_PAYMENT_STATUSES = [ + "ALLOWED", + "DISALLOWED" +] diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 0ad05054..36820935 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -31,8 +31,8 @@ from nck.readers.search_console_reader import search_console from nck.readers.adobe_reader import adobe from nck.readers.radarly_reader import radarly -from nck.readers.yandex_reader import yandex - +from nck.readers.yandex_campaign_reader import yandex_campaigns +from nck.readers.yandex_statistics_reader import yandex_statistics readers = [ mysql, @@ -49,7 +49,7 @@ search_console, adobe, radarly, - yandex + yandex_campaigns ] diff --git a/nck/readers/yandex_campaign_reader.py b/nck/readers/yandex_campaign_reader.py new file mode 100644 index 00000000..d1142907 --- /dev/null +++ b/nck/readers/yandex_campaign_reader.py @@ -0,0 +1,118 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import click + +import nck.helpers.api_client_helper as api_client_helper +from nck.clients.api_client import ApiClient +from nck.commands.command import processor +from nck.helpers.yandex_helper import (CAMPAIGN_FIELDS, CAMPAIGN_STATES, + CAMPAIGN_STATUSES, CAMPAIGN_PAYMENT_STATUSES) +from nck.readers.reader import Reader +from nck.streams.json_stream import JSONStream +from nck.utils.args import extract_args + + +@click.command(name="read_yandex_campaigns") +@click.option("--yandex-token", required=True) +@click.option( + "--yandex-campaign-id", + "yandex_campaign_ids", + multiple=True +) +@click.option( + "--yandex-campaign-state", + "yandex_campaign_states", + multiple=True, + type=click.Choice(CAMPAIGN_STATES) +) +@click.option( + "--yandex-campaign-status", + "yandex_campaign_statuses", + multiple=True, + type=click.Choice(CAMPAIGN_STATUSES) +) +@click.option( + "--yandex-campaign-payment-status", + "yandex_campaign_payment_statuses", + multiple=True, + type=click.Choice(CAMPAIGN_PAYMENT_STATUSES) +) +@click.option( + "--yandex-field-name", + "yandex_fields", + multiple=True, + type=click.Choice(CAMPAIGN_FIELDS), + required=True, + help=( + "Fields to output in the report (columns)." + "For the full list of fields and their meanings, " + "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" + ) +) +@processor("yandex_token") +def yandex_campaigns(**kwargs): + return YandexCampaignReader(**extract_args("yandex_", kwargs)) + + +YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" + + +class YandexCampaignReader(Reader): + + def __init__( + self, + token, + fields, + **kwargs + ): + self.token = token + self.fields = list(fields) + self.campaign_ids = list(kwargs["campaign_ids"]) + self.campaign_states = list(kwargs["campaign_states"]) + self.campaign_statuses = list(kwargs["campaign_statuses"]) + self.campaign_payment_statuses = list(kwargs["campaign_payment_statuses"]) + + def result_generator(self): + api_client = ApiClient(self.token, YANDEX_DIRECT_API_BASE_URL) + request_body = self._build_request_body() + response = api_client.execute_request(url="campaigns", body=request_body, headers={}) + yield response.json() + + def _build_request_body(self): + body = {} + body["method"] = "get" + selection_criteria = {} + if len(self.campaign_ids) != 0: + selection_criteria["Ids"] = self.campaign_ids + if len(self.campaign_states) != 0: + selection_criteria["States"] = self.campaign_states + if len(self.campaign_statuses) != 0: + selection_criteria["Statuses"] = self.campaign_statuses + if len(self.campaign_payment_statuses) != 0: + selection_criteria["StatusesPayment"] = self.campaign_payment_statuses + body["params"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( + field_names=self.fields, + selection_criteria=selection_criteria + ) + return body + + def read(self): + yield JSONStream( + "results_CAMPAIGN_OBJECT_REPORT_", + self.result_generator() + ) diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py new file mode 100644 index 00000000..3af8d647 --- /dev/null +++ b/nck/readers/yandex_statistics_reader.py @@ -0,0 +1,143 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import click + +from nck.clients.api_client import ApiClient +from nck.commands.command import processor +from nck.helpers.yandex_helper import (ATTRIBUTION_MODELS, DATE_RANGE_TYPES, + LANGUAGES, OPERATORS, REPORT_TYPES, + STATS_FIELDS) +from nck.readers.reader import Reader +from nck.streams.json_stream import JSONStream +from nck.utils.args import extract_args + + +class StrList(click.ParamType): + + def convert(self, value, param, ctx): + return value.split(",") + + +STR_LIST_TYPE = StrList() + + +@click.command(name="read_yandex_statistics") +@click.option("--yandex-token", required=True) +@click.option( + "--yandex-report-language", + type=click.Choice(LANGUAGES), + default="en" +) +@click.option( + "--yandex-filter", + "yandex_filters", + multiple=True, + type=click.Tuple([click.Choice(STATS_FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE]) +) +@click.option( + "--yandex-attribution-model", + multiple=True, + type=click.Choice(ATTRIBUTION_MODELS) +) +@click.option( + "--yandex-max-rows", + type=int +) +@click.option( + "--yandex-field-name", + "yandex_fields", + multiple=True, + type=click.Choice(), + required=True, + help=( + "Fields to output in the report (columns)." + "For the full list of fields and their meanings, " + "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" + ) +) +@click.option( + "--yandex-report-name", + required=True +) +@click.option( + "--yandex-report-type", + type=click.Choice(REPORT_TYPES), + required=True +) +@click.option( + "--yandex-date-range", + type=click.Choice(DATE_RANGE_TYPES), + required=True +) +@click.option( + "--yandex-include-vat", + type=click.BOOL, + required=True, + help="Whether to include VAT in the monetary amounts in the report." +) +@click.option( + "--yandex-date-start", + type=click.DateTime() +) +@click.option( + "--yandex-date-stop", + type=click.DateTime() +) +@processor("yandex_token") +def yandex_statistics(**kwargs): + return YandexStatisticsReader(**extract_args("yandex_", kwargs)) + + +YANDEX_DIRECT_API_BASE_URL = "https://api.direct.yandex.com/json/v5/" + + +class YandexStatisticsReader(Reader): + + def __init__( + self, + token, + fields, + report_type, + report_name, + date_range, + include_vat, + **kwargs + ): + self.token = token + self.fields = list(fields) + self.report_type = report_type + self.report_name = report_name + self.date_range = date_range + self.include_vat = include_vat + self.kwargs = kwargs + + def result_generator(self): + api_client = ApiClient(self.token, YANDEX_DIRECT_API_BASE_URL) + request_body = self._build_query_body() + response = api_client.execute_request(url="reports", body=request_body, headers={}) + yield response.json() + + def _build_request_body(self): + body = {} + return body + + def read(self): + yield JSONStream( + f"results_{self.report_type}", + self.result_generator() + ) diff --git a/tests/readers/test_yandex_campaign_reader.py b/tests/readers/test_yandex_campaign_reader.py new file mode 100644 index 00000000..b13c07e3 --- /dev/null +++ b/tests/readers/test_yandex_campaign_reader.py @@ -0,0 +1,77 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import unittest + +from parameterized import parameterized + +from nck.readers.yandex_campaign_reader import YandexCampaignReader + + +class TestYandexReader(unittest.TestCase): + + @parameterized.expand([ + ( + { + "campaign_states": (), + "campaign_ids": (), + "campaign_statuses": (), + "campaign_payment_statuses": () + }, + { + "method": "get", + "params": { + "SelectionCriteria": { + }, + "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"] + } + } + ), + ( + { + "campaign_states": ("ON",), + "campaign_ids": (), + "campaign_statuses": ("ACCEPTED",), + "campaign_payment_statuses": ("ALLOWED",) + }, + { + "method": "get", + "params": { + "SelectionCriteria": { + "States": ["ON"], + "Statuses": ["ACCEPTED"], + "StatusesPayment": ["ALLOWED"] + }, + "FieldNames": ["Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"] + } + } + ) + ]) + def test_get_query_body( + self, + kwargs, + expected_query_body + ): + reader = YandexCampaignReader( + "123", + ("Id", "Name", "TimeZone", "DailyBudget", "Currency", "EndDate", "StartDate"), + campaign_ids=kwargs["campaign_ids"], + campaign_states=kwargs["campaign_states"], + campaign_statuses=kwargs["campaign_statuses"], + campaign_payment_statuses=kwargs["campaign_payment_statuses"] + ) + self.assertDictEqual(reader._build_request_body(), expected_query_body) From ebbb0b69b7da2a36b08d26422fc3ead73075cea3 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Tue, 24 Mar 2020 18:37:58 +0100 Subject: [PATCH 35/66] Fix: missing reader --- nck/readers/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 36820935..8c5375a8 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -49,7 +49,8 @@ search_console, adobe, radarly, - yandex_campaigns + yandex_campaigns, + yandex_statistics ] From 00f100d67170715eedd3ca93f74a44e26aacfa02 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 25 Mar 2020 15:30:15 +0100 Subject: [PATCH 36/66] sa360 reader v0 --- .env | 4 +- nck/clients/sa360_client.py | 171 ++++++++++++++++++++++++++++++++++++ nck/helpers/sa360_helper.py | 41 +++++++++ nck/readers/__init__.py | 2 + nck/readers/sa360_reader.py | 123 ++++++++++++++++++++++++++ 5 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 nck/clients/sa360_client.py create mode 100644 nck/helpers/sa360_helper.py create mode 100644 nck/readers/sa360_reader.py diff --git a/.env b/.env index cf087144..438ecf16 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connector-kit-dev -DOCKER_TAG=FBURL +DOCKER_IMAGE=nautilus-connector-kit +DOCKER_TAG=1.4.0 DOCKER_REGISTRY=eu.gcr.io diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py new file mode 100644 index 00000000..c953855a --- /dev/null +++ b/nck/clients/sa360_client.py @@ -0,0 +1,171 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import logging +import httplib2 +import requests + +from tenacity import retry, wait_exponential, stop_after_delay +from oauth2client import client, GOOGLE_TOKEN_URI +from googleapiclient import discovery + + +logger = logging.getLogger("SA360_client") + +DOWNLOAD_FORMAT = "CSV" + + +class SA360Client: + API_NAME = "doubleclicksearch" + API_VERSION = "v2" + + def __init__(self, access_token, client_id, client_secret, refresh_token): + self._credentials = client.GoogleCredentials( + access_token=access_token, + client_id=client_id, + client_secret=client_secret, + refresh_token=refresh_token, + token_expiry=None, + token_uri=GOOGLE_TOKEN_URI, + user_agent=None, + ) + http = self._credentials.authorize(httplib2.Http()) + self._credentials.refresh(http) + self.auth = ( + f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" + ) + self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) + + @staticmethod + def generate_report_body( + agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions + ): + all_columns = SA360Client.generate_columns(columns, custom_metrics, custom_dimensions) + body = { + "reportScope": {"agencyId": agency_id, "advertiserId": advertiser_id}, + "reportType": report_type, + "columns": all_columns, + "timeRange": SA360Client.get_date_range(start_date, end_date), + "downloadFormat": "csv", + "maxRowsPerFile": 4000000, + "statisticsCurrency": "usd", + } + logger.info("Report Body Generated") + + return body + + def request_report_id(self, body): + report = self._service.reports().request(body=body).execute() + logger.info("Report requested!") + return report["id"] + + @retry(wait=wait_exponential(multiplier=1, min=1, max=8), stop=stop_after_delay(3600)) + def assert_report_file_ready(self, report_id): + """Poll the API with the reportId until the report is ready, up to 100 times. + + Args: + report_id: The ID SA360 has assigned to a report. + """ + request = self._service.reports().get(reportId=report_id) + report_data = request.execute() + if report_data["isReportReady"]: + logger.info("The report is ready.") + + # For large reports, SA360 automatically fragments the report into multiple + # files. The 'files' property in the JSON object that SA360 returns contains + # the list of URLs for file fragment. To download a report, SA360 needs to + # know the report ID and the index of a file fragment. + return report_data + else: + logger.info("Report is not ready.") + raise FileNotFoundError + + def download_report_files(self, json_data, report_id): + for fragment in range(len(json_data["files"])): + logger.info(f"Downloading fragment {str(fragment)} for report {report_id}") + yield self.download_fragment(report_id, str(fragment)) + + def download_fragment(self, report_id, fragment): + """Generate and convert to df a report fragment. + + Args: + report_id: The ID SA360 has assigned to a report. + report_fragment: The 0-based index of the file fragment from the files array. + currency_code: the currency code of the report + """ + # csv_fragment_report = (self._service.reports().getFile(reportId=report_id, reportFragment=fragment).execute()) + # print(csv_fragment_report) + # print(io.BytesIO(csv_fragment_report)) + request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) + headers = request.headers + headers.update({"Authorization": self.auth}) + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() + + # i = 0 + # index = 0 + # impr_keyword = 0 + # for row in r.iter_lines(): + # decoded_row = row.decode("utf-8") + # if "impr" in decoded_row: + # decoded_row = decoded_row.split(",") + # index = decoded_row.index("impr") + # continue + # + # if "samsung note 10+ 6.8" in decoded_row: + # r = decoded_row.split(",") + # impr_keyword += int(r[index]) + # print(decoded_row) + # decoded_row = decoded_row.split(",") + # i += int(decoded_row[index]) + # print("IMPRESSIONS", i, impr_keyword) + + # yield from r.iter_lines() + + # df = pd.DataFrame.from_csv(io.BytesIO(csv_fragment_report)) + # df["currency_code"] = currency_code + # from tabulate import tabulate + # print(tabulate(df, headers='keys', tablefmt='psql')) + # return df + + def direct_report_download(self, report_id, file_id): + # Retrieve the file metadata. + report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() + + if report_file["status"] == "REPORT_AVAILABLE": + # Create a get request. + request = self._service.files().get_media(reportId=report_id, fileId=file_id) + headers = request.headers + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() + + @staticmethod + def generate_columns(columns, custom_dimensions, custom_metrics): + standard = [{"columnName": column} for column in columns] + dimensions = [{"columnDimensionName": column, "platformSource": "floodlight"} for column in custom_dimensions] + metrics = [{"columnMetricName": column, "platformSource": "floodlight"} for column in custom_metrics] + + return standard + dimensions + metrics + + @staticmethod + def get_date_range(start_date, end_date): + start = start_date.strftime("%Y-%m-%d") + end = end_date.strftime("%Y-%m-%d") + logger.warning(f"Custom date range selected: {start} --> {end}") + return {"startDate": start, "endDate": end} diff --git a/nck/helpers/sa360_helper.py b/nck/helpers/sa360_helper.py new file mode 100644 index 00000000..7375996c --- /dev/null +++ b/nck/helpers/sa360_helper.py @@ -0,0 +1,41 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +REPORT_TYPES = [ + "advertiser", + "account", + "ad", + "adGroup", + "adGroupTarget", + "bidStrategy", + "campaign", + "campaignTarget", + "conversion", + "feedltem", + "floodlightActivity", + "keyword", + "negativeAdGroupKeyword", + "negativeAdGroupTarget", + "negativeCampaignKeyword", + "negativeCampaignTarget", + "paidAndOrganic", + "productAdvertised", + "productGroup", + "productLeadAndCrossSell", + "productTarget", + "visit", +] diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index f2cb21a1..8e7f800e 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -21,6 +21,7 @@ from nck.readers.gcs_reader import gcs from nck.readers.googleads_reader import google_ads from nck.readers.s3_reader import s3 +from nck.readers.sa360_reader import sa360_reader from nck.readers.oracle_reader import oracle from nck.readers.gsheets_reader import gsheets from nck.readers.salesforce_reader import salesforce @@ -40,6 +41,7 @@ gcs, google_ads, s3, + sa360_reader, facebook_marketing, oracle, dbm, diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py new file mode 100644 index 00000000..b201a6db --- /dev/null +++ b/nck/readers/sa360_reader.py @@ -0,0 +1,123 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import csv +import click + +from io import StringIO + +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.utils.args import extract_args +from nck.streams.normalized_json_stream import NormalizedJSONStream +from nck.clients.sa360_client import SA360Client +from nck.helpers.sa360_helper import REPORT_TYPES + +DATEFORMAT = "%Y-%m-%d" +ENCODING = "utf-8" + + +@click.command(name="read_sa360") +@click.option("--sa360-access-token", default=None) +@click.option("--sa360-client-id", required=True) +@click.option("--sa360-client-secret", required=True) +@click.option("--sa360-refresh-token", required=True) +@click.option("--sa360-agency-id", required=True) +@click.option("--sa360-advertiser-id", "sa360_advertiser_ids", required=True, multiple=True) +@click.option("--sa360-report-name", default="SA360 Report") +@click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) +@click.option( + "--sa360-column", "sa360_columns", multiple=True, help="https://developers.google.com/search-ads/v2/report-types" +) +@click.option( + "--sa360-custom-dimension", + "sa360_custom_dimensions", + multiple=True, + help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", +) +@click.option( + "--sa360-custom-metric", + "sa360_custom_metrics", + multiple=True, + help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", +) +@click.option("--sa360-start-date", type=click.DateTime(), required=True) +@click.option("--sa360-end-date", type=click.DateTime(), required=True) +@processor("sa360_access_token", "sa360_refresh_token", "sa360_client_secret") +def sa360_reader(**kwargs): + return SA360Reader(**extract_args("sa360_", kwargs)) + + +class SA360Reader(Reader): + def __init__( + self, + access_token, + client_id, + client_secret, + refresh_token, + agency_id, + advertiser_ids, + report_name, + report_type, + columns, + custom_metrics, + custom_dimensions, + start_date, + end_date, + ): + self.sa360_client = SA360Client(access_token, client_id, client_secret, refresh_token) + self.agency_id = agency_id + self.advertiser_ids = list(advertiser_ids) + self.report_name = report_name + self.report_type = report_type + self.columns = list(columns) + self.custom_metrics = list(custom_metrics) + self.custom_dimensions = list(custom_dimensions) + self.all_columns = self.columns + self.custom_dimensions + self.custom_metrics + self.start_date = start_date + self.end_date = end_date + + def format_response(self, report_generator): + # skip headers in the CSV output + next(report_generator) + for row in report_generator: + decoded_row = row.decode(ENCODING) + csv_reader = csv.DictReader(StringIO(decoded_row), self.all_columns) + yield next(csv_reader) + + def result_generator(self): + advertiser_id = next((a for a in self.advertiser_ids), "") + body = self.sa360_client.generate_report_body( + self.agency_id, + advertiser_id, + self.report_type, + self.columns, + self.start_date, + self.end_date, + self.custom_dimensions, + self.custom_metrics, + ) + + report_id = self.sa360_client.request_report_id(body) + + report_data = self.sa360_client.assert_report_file_ready(report_id) + + for report_generator in self.sa360_client.download_report_files(report_data, report_id): + yield from self.format_response(report_generator) + + def read(self): + yield NormalizedJSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) From 672710bd4009d6a82b862bd44d5c73dc7b130180 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Wed, 25 Mar 2020 16:30:02 +0100 Subject: [PATCH 37/66] Feature: add statistics report --- nck/clients/api_client.py | 3 +- nck/helpers/yandex_helper.py | 2 - nck/readers/yandex_statistics_reader.py | 97 ++++++++-- nck/utils/text.py | 29 +++ tests/readers/test_yandex_campaign_reader.py | 2 +- .../readers/test_yandex_statistics_reader.py | 183 ++++++++++++++++++ tests/utils/test_text_utils.py | 40 +++- 7 files changed, 332 insertions(+), 24 deletions(-) create mode 100644 tests/readers/test_yandex_statistics_reader.py diff --git a/nck/clients/api_client.py b/nck/clients/api_client.py index 469083d2..f95971a8 100644 --- a/nck/clients/api_client.py +++ b/nck/clients/api_client.py @@ -34,7 +34,8 @@ def execute_request( method: str = "GET", url: str = "", body: Dict[str, Any] = None, - headers: Dict[str, str] = None + headers: Dict[str, str] = None, + stream: bool = False ): headers["Authorization"] = f"Bearer {self.token}" response = self.session.request(method, url, json=body, headers=headers) diff --git a/nck/helpers/yandex_helper.py b/nck/helpers/yandex_helper.py index 536dfaf1..9990ecba 100644 --- a/nck/helpers/yandex_helper.py +++ b/nck/helpers/yandex_helper.py @@ -103,8 +103,6 @@ "Type" ] -ATTRIBUTION_MODELS = ["FC", "LC", "LSC", "LYDC"] - DATE_RANGE_TYPES = [ "TODAY", "YESTERDAY", diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py index 3af8d647..16247813 100644 --- a/nck/readers/yandex_statistics_reader.py +++ b/nck/readers/yandex_statistics_reader.py @@ -15,16 +15,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import datetime +from http import HTTPStatus +import logging +import time +from typing import Tuple, Dict + import click +import nck.helpers.api_client_helper as api_client_helper from nck.clients.api_client import ApiClient from nck.commands.command import processor -from nck.helpers.yandex_helper import (ATTRIBUTION_MODELS, DATE_RANGE_TYPES, +from nck.helpers.yandex_helper import (DATE_RANGE_TYPES, LANGUAGES, OPERATORS, REPORT_TYPES, STATS_FIELDS) from nck.readers.reader import Reader from nck.streams.json_stream import JSONStream from nck.utils.args import extract_args +from nck.utils.text import get_generator_dict_from_str_tsv class StrList(click.ParamType): @@ -35,6 +43,8 @@ def convert(self, value, param, ctx): STR_LIST_TYPE = StrList() +logger = logging.getLogger(__name__) + @click.command(name="read_yandex_statistics") @click.option("--yandex-token", required=True) @@ -49,11 +59,6 @@ def convert(self, value, param, ctx): multiple=True, type=click.Tuple([click.Choice(STATS_FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE]) ) -@click.option( - "--yandex-attribution-model", - multiple=True, - type=click.Choice(ATTRIBUTION_MODELS) -) @click.option( "--yandex-max-rows", type=int @@ -62,7 +67,7 @@ def convert(self, value, param, ctx): "--yandex-field-name", "yandex_fields", multiple=True, - type=click.Choice(), + type=click.Choice(STATS_FIELDS), required=True, help=( "Fields to output in the report (columns)." @@ -72,7 +77,7 @@ def convert(self, value, param, ctx): ) @click.option( "--yandex-report-name", - required=True + default=f"stats_report_{datetime.date.today()}" ) @click.option( "--yandex-report-type", @@ -111,11 +116,11 @@ class YandexStatisticsReader(Reader): def __init__( self, token, - fields, - report_type, - report_name, - date_range, - include_vat, + fields: Tuple[str], + report_type: str, + report_name: str, + date_range: str, + include_vat: bool, **kwargs ): self.token = token @@ -128,14 +133,70 @@ def __init__( def result_generator(self): api_client = ApiClient(self.token, YANDEX_DIRECT_API_BASE_URL) - request_body = self._build_query_body() - response = api_client.execute_request(url="reports", body=request_body, headers={}) - yield response.json() - - def _build_request_body(self): + body = self._build_request_body() + headers = self._build_request_headers() + while True: + response = api_client.execute_request( + url="reports", + body=body, + headers=headers, + stream=True + ) + if response.status_code == HTTPStatus.CREATED: + waiting_time = int(response.headers["retryIn"]) + logger.info(f"Report added to queue. Should be ready in {waiting_time} min.") + time.sleep(waiting_time * 60) + elif response.status_code == HTTPStatus.ACCEPTED: + logger.info("Report in queue.") + elif response.status_code == HTTPStatus.OK: + logger.info("Report successfully retrieved.") + return get_generator_dict_from_str_tsv( + response.iter_lines(), + skip_first_row=True + ) + elif response.status_code == HTTPStatus.BAD_REQUEST: + logger.error("Invalid request.") + logger.error(response.json()) + break + return None + + def _build_request_body(self) -> Dict: body = {} + selection_criteria = {} + if len(self.kwargs["filters"]) > 0: + selection_criteria["Filter"] = [ + api_client_helper.get_dict_with_keys_converted_to_new_string_format( + field=filter_element[0], + operator=filter_element[1], + values=filter_element[2] + ) + for filter_element in self.kwargs["filters"] + ] + if self.kwargs["date_start"] is not None: + selection_criteria["DateFrom"] = self.kwargs["date_start"].strftime("%Y-%m-%d") + if self.kwargs["date_stop"] is not None: + selection_criteria["DateTo"] = self.kwargs["date_stop"].strftime("%Y-%m-%d") + body["params"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( + selection_criteria=selection_criteria, + field_names=self.fields, + report_name=self.report_name, + report_type=self.report_type, + date_range_type=self.date_range, + format="TSV", + include_v_a_t="YES" if self.include_vat else "NO" + ) + if self.kwargs["max_rows"] is not None: + body["params"]["Page"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( + limit=self.kwargs["max_rows"] + ) return body + def _build_request_headers(self) -> Dict: + return { + "skipReportSummary": "true", + "Accept-Language": self.kwargs["report_language"] + } + def read(self): yield JSONStream( f"results_{self.report_type}", diff --git a/nck/utils/text.py b/nck/utils/text.py index d80c5b13..b6ff82f1 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -76,6 +76,35 @@ def get_generator_dict_from_str_csv( yield dict(zip(headers, parse_decoded_line(line))) +def get_generator_dict_from_str_tsv( + line_iterator: Generator[Union[bytes, str], None, None], + skip_first_row=False +) -> Generator[Dict[str, str], None, None]: + if skip_first_row: + next(line_iterator) + headers_line = next(line_iterator) + headers = ( + parse_decoded_line(headers_line.decode("utf-8"), delimiter="\t") + if isinstance(headers_line, bytes) + else parse_decoded_line(headers_line, delimiter="\t") + ) + for line in line_iterator: + if isinstance(line, bytes): + try: + line = line.decode("utf-8") + except UnicodeDecodeError as err: + logging.warning( + "An error has occured while parsing the file. " + "The line could not be decoded in %s." + "Invalid input that the codec failed on: %s", + err.encoding, + err.object[err.start : err.end], + ) + line = line.decode("utf-8", errors="ignore") + + yield dict(zip(headers, parse_decoded_line(line, delimiter="\t"))) + + def parse_decoded_line(line: str, delimiter=",", quotechar='"') -> List[str]: line_as_file = StringIO(line) reader = csv.reader( diff --git a/tests/readers/test_yandex_campaign_reader.py b/tests/readers/test_yandex_campaign_reader.py index b13c07e3..48b4b0d2 100644 --- a/tests/readers/test_yandex_campaign_reader.py +++ b/tests/readers/test_yandex_campaign_reader.py @@ -22,7 +22,7 @@ from nck.readers.yandex_campaign_reader import YandexCampaignReader -class TestYandexReader(unittest.TestCase): +class TestYandexCampaignReader(unittest.TestCase): @parameterized.expand([ ( diff --git a/tests/readers/test_yandex_statistics_reader.py b/tests/readers/test_yandex_statistics_reader.py new file mode 100644 index 00000000..e5a6bc6a --- /dev/null +++ b/tests/readers/test_yandex_statistics_reader.py @@ -0,0 +1,183 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import datetime +import unittest + +from parameterized import parameterized + +from nck.readers.yandex_statistics_reader import YandexStatisticsReader + + +class TestYandexStatisticsReader(unittest.TestCase): + + @parameterized.expand([ + ( + { + "report_language": "en", + "filters": (), + "max_rows": None, + "date_start": None, + "date_stop": None + }, + True, + { + "params": { + "SelectionCriteria": {}, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "ALL_TIME", + "Format": "TSV", + "IncludeVAT": "YES" + } + } + ), + ( + { + "report_language": "en", + "filters": (), + "max_rows": None, + "date_start": None, + "date_stop": None + }, + False, + { + "params": { + "SelectionCriteria": {}, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "ALL_TIME", + "Format": "TSV", + "IncludeVAT": "NO" + } + } + ), + ( + { + "report_language": "en", + "filters": (), + "max_rows": 25, + "date_start": datetime.datetime(2020, 3, 5, 0, 0), + "date_stop": datetime.datetime(2020, 3, 25, 0, 0) + }, + False, + { + "params": { + "SelectionCriteria": { + "DateFrom": "2020-03-05", + "DateTo": "2020-03-25" + }, + "Page": { + "Limit": 25 + }, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "ALL_TIME", + "Format": "TSV", + "IncludeVAT": "NO" + } + } + ), + ( + { + "report_language": "en", + "filters": ( + ("AdGroupId", "EQUALS", ["1"]), + ("CampaignId", "IN", ["1", "2"]) + ), + "max_rows": 25, + "date_start": datetime.datetime(2020, 3, 5, 0, 0), + "date_stop": datetime.datetime(2020, 3, 25, 0, 0) + }, + False, + { + "params": { + "SelectionCriteria": { + "DateFrom": "2020-03-05", + "DateTo": "2020-03-25", + "Filter": [ + { + "Field": "AdGroupId", + "Operator": "EQUALS", + "Values": ["1"] + }, + { + "Field": "CampaignId", + "Operator": "IN", + "Values": ["1", "2"] + } + ] + }, + "Page": { + "Limit": 25 + }, + "FieldNames": ["AdFormat", "AdGroupId"], + "ReportName": "stats_report_2020-03_25", + "ReportType": "AD_PERFORMANCE_REPORT", + "DateRangeType": "ALL_TIME", + "Format": "TSV", + "IncludeVAT": "NO" + } + } + ) + ]) + def test_get_query_body( + self, + kwargs, + include_vat, + expected_query_body + ): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + "ALL_TIME", + include_vat, + report_language=kwargs["report_language"], + filters=kwargs["filters"], + max_rows=kwargs["max_rows"], + date_start=kwargs["date_start"], + date_stop=kwargs["date_stop"] + ) + self.assertDictEqual(reader._build_request_body(), expected_query_body) + + @parameterized.expand(["en", "ru", "uk"]) + def test_request_headers(self, report_language): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + "ALL_TIME", + True, + report_language=report_language, + filters=(), + max_rows=None, + date_start=None, + date_stop=None + ) + self.assertDictEqual( + { + "skipReportSummary": "true", + "Accept-Language": report_language + }, + reader._build_request_headers() + ) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index 4b11014b..0a9202f0 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -15,12 +15,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from datetime import date import logging import unittest +from datetime import date from unittest.mock import patch -from nck.utils.text import get_generator_dict_from_str_csv, parse_decoded_line +from parameterized import parameterized + +from nck.utils.text import (get_generator_dict_from_str_csv, + get_generator_dict_from_str_tsv, + parse_decoded_line) class TestTextUtilsMethod(unittest.TestCase): @@ -279,3 +283,35 @@ def test_response_not_binary_with_date(self): date_format="%Y/%m/%d" ): self.assertEqual(dic, expected_dict) + + @parameterized.expand([ + ( + True, + [ + b'"Perf report (2017-03-01 - 2020-03-25)"', + b'AdFormat\tAdGroupId\tAdGroupName', + b'IMAGE\t123\tAdGroup', + b'IMAGE\t123\tAdGroup', + ] + ), + ( + False, + [ + b'AdFormat\tAdGroupId\tAdGroupName', + b'IMAGE\t123\tAdGroup', + b'IMAGE\t123\tAdGroup', + ] + ) + ]) + def test_parse_tsv_with_first_row_skipped(self, skip_first_row, lines): + expected_dict = { + "AdFormat": "IMAGE", + "AdGroupId": "123", + "AdGroupName": "AdGroup" + } + line_iterator = (line for line in lines) + for dic in get_generator_dict_from_str_tsv( + line_iterator, + skip_first_row=skip_first_row + ): + self.assertEqual(dic, expected_dict) From 2944149da32f0cb8ff59f9d60b8b572d589c55b9 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 10:54:44 +0100 Subject: [PATCH 38/66] Fix: add error logging if unknown status code --- nck/readers/yandex_statistics_reader.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py index 16247813..ba48ca53 100644 --- a/nck/readers/yandex_statistics_reader.py +++ b/nck/readers/yandex_statistics_reader.py @@ -158,6 +158,13 @@ def result_generator(self): logger.error("Invalid request.") logger.error(response.json()) break + elif response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR: + logger.error("Internal server error.") + logger.error(response.json()) + break + else: + logger.error(response.json()) + break return None def _build_request_body(self) -> Dict: From f40bc30571ce7026142f49064c47a80f9c541df9 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 11:41:56 +0100 Subject: [PATCH 39/66] Refacto: improve maintainability with easier to read logic --- nck/helpers/api_client_helper.py | 14 +++++--------- tests/helpers/test_api_client_helper.py | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/nck/helpers/api_client_helper.py b/nck/helpers/api_client_helper.py index 5fa3d667..3582700d 100644 --- a/nck/helpers/api_client_helper.py +++ b/nck/helpers/api_client_helper.py @@ -27,18 +27,14 @@ def get_dict_with_keys_converted_to_new_string_format( str_format: str = "PascalCase", **kwargs ) -> Dict: if str_format in POSSIBLE_STRING_FORMATS and str_format == "PascalCase": - new_keys = [ - "".join(word.capitalize() for word in old_key.split("_")) - for old_key in kwargs - ] - old_keys = kwargs.copy().keys() - formatted_dict = {} - for old_key, new_key in zip(old_keys, new_keys): - formatted_dict[new_key] = kwargs.pop(old_key) - return formatted_dict + return {to_pascal_key(key): value for key, value in kwargs.items()} else: logging.error(( "Unable to convert to new string format. " "Format not in %s" ) % POSSIBLE_STRING_FORMATS) return None + + +def to_pascal_key(key: str): + return "".join(word.capitalize() for word in key.split("_")) diff --git a/tests/helpers/test_api_client_helper.py b/tests/helpers/test_api_client_helper.py index 6d8aea16..6e31dbfe 100644 --- a/tests/helpers/test_api_client_helper.py +++ b/tests/helpers/test_api_client_helper.py @@ -18,7 +18,10 @@ import unittest import logging -from nck.helpers.api_client_helper import get_dict_with_keys_converted_to_new_string_format +from parameterized import parameterized + +from nck.helpers.api_client_helper import (get_dict_with_keys_converted_to_new_string_format, + to_pascal_key) class ApiClientHelperTest(unittest.TestCase): @@ -37,6 +40,16 @@ def test_string_conversion_to_camel_case(self): } ) + @parameterized.expand([ + ("test", "Test"), + ("test_test", "TestTest"), + ("test_test_test", "TestTestTest"), + ("tEST", "Test"), + ("t_e_s_t", "TEST") + ]) + def test_to_pascal_key(self, key, pascal_key): + self.assertEquals(to_pascal_key(key), pascal_key) + def test_unknown_case(self): with self.assertLogs() as cm: logging.getLogger("ApiClient") From 29ddac6f9d80cac39ed5d212bf55c8c538c8e078 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 12:30:57 +0100 Subject: [PATCH 40/66] Fix: add random report name to avoid conflicts when testing --- nck/readers/yandex_statistics_reader.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py index ba48ca53..ddf3adbc 100644 --- a/nck/readers/yandex_statistics_reader.py +++ b/nck/readers/yandex_statistics_reader.py @@ -16,19 +16,19 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import datetime -from http import HTTPStatus import logging +import random import time -from typing import Tuple, Dict +from http import HTTPStatus +from typing import Dict, Tuple import click import nck.helpers.api_client_helper as api_client_helper from nck.clients.api_client import ApiClient from nck.commands.command import processor -from nck.helpers.yandex_helper import (DATE_RANGE_TYPES, - LANGUAGES, OPERATORS, REPORT_TYPES, - STATS_FIELDS) +from nck.helpers.yandex_helper import (DATE_RANGE_TYPES, LANGUAGES, OPERATORS, + REPORT_TYPES, STATS_FIELDS) from nck.readers.reader import Reader from nck.streams.json_stream import JSONStream from nck.utils.args import extract_args @@ -77,7 +77,7 @@ def convert(self, value, param, ctx): ) @click.option( "--yandex-report-name", - default=f"stats_report_{datetime.date.today()}" + default=f"stats_report_{datetime.date.today()}_{random.randrange(10000)}" ) @click.option( "--yandex-report-type", From d09786d5a4e1687d1bf7a3a208fc664d8ad1bba2 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 15:55:25 +0100 Subject: [PATCH 41/66] Fix: better way to handle dates --- nck/readers/yandex_statistics_reader.py | 35 ++++++-- .../readers/test_yandex_statistics_reader.py | 86 ++++++++++++++++++- 2 files changed, 113 insertions(+), 8 deletions(-) diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py index ddf3adbc..0506354a 100644 --- a/nck/readers/yandex_statistics_reader.py +++ b/nck/readers/yandex_statistics_reader.py @@ -169,7 +169,7 @@ def result_generator(self): def _build_request_body(self) -> Dict: body = {} - selection_criteria = {} + selection_criteria = self._add_custom_dates_if_set() if len(self.kwargs["filters"]) > 0: selection_criteria["Filter"] = [ api_client_helper.get_dict_with_keys_converted_to_new_string_format( @@ -179,10 +179,6 @@ def _build_request_body(self) -> Dict: ) for filter_element in self.kwargs["filters"] ] - if self.kwargs["date_start"] is not None: - selection_criteria["DateFrom"] = self.kwargs["date_start"].strftime("%Y-%m-%d") - if self.kwargs["date_stop"] is not None: - selection_criteria["DateTo"] = self.kwargs["date_stop"].strftime("%Y-%m-%d") body["params"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( selection_criteria=selection_criteria, field_names=self.fields, @@ -204,6 +200,35 @@ def _build_request_headers(self) -> Dict: "Accept-Language": self.kwargs["report_language"] } + def _add_custom_dates_if_set(self) -> Dict: + selection_criteria = {} + if ( + self.kwargs["date_start"] is not None + and self.kwargs["date_stop"] is not None + and self.date_range == "CUSTOM_DATE" + ): + selection_criteria["DateFrom"] = self.kwargs["date_start"].strftime("%Y-%m-%d") + selection_criteria["DateTo"] = self.kwargs["date_stop"].strftime("%Y-%m-%d") + elif ( + self.kwargs["date_start"] is not None + and self.kwargs["date_stop"] is not None + and self.date_range != "CUSTOM_DATE" + ): + raise click.ClickException("Wrong date range. If start and stop dates are set, should be CUSTOM_DATE.") + elif ( + self.kwargs["date_start"] is not None + and self.kwargs["date_stop"] is None + and self.date_range == "CUSTOM_DATE" + ): + raise click.ClickException("Stop date missing.") + elif ( + self.kwargs["date_start"] is None + and self.kwargs["date_stop"] is not None + and self.date_range == "CUSTOM_DATE" + ): + raise click.ClickException("Start date missing.") + return selection_criteria + def read(self): yield JSONStream( f"results_{self.report_type}", diff --git a/tests/readers/test_yandex_statistics_reader.py b/tests/readers/test_yandex_statistics_reader.py index e5a6bc6a..93794ee0 100644 --- a/tests/readers/test_yandex_statistics_reader.py +++ b/tests/readers/test_yandex_statistics_reader.py @@ -18,6 +18,7 @@ import datetime import unittest +import click from parameterized import parameterized from nck.readers.yandex_statistics_reader import YandexStatisticsReader @@ -27,6 +28,7 @@ class TestYandexStatisticsReader(unittest.TestCase): @parameterized.expand([ ( + "ALL_TIME", { "report_language": "en", "filters": (), @@ -48,6 +50,7 @@ class TestYandexStatisticsReader(unittest.TestCase): } ), ( + "ALL_TIME", { "report_language": "en", "filters": (), @@ -69,6 +72,7 @@ class TestYandexStatisticsReader(unittest.TestCase): } ), ( + "CUSTOM_DATE", { "report_language": "en", "filters": (), @@ -89,13 +93,14 @@ class TestYandexStatisticsReader(unittest.TestCase): "FieldNames": ["AdFormat", "AdGroupId"], "ReportName": "stats_report_2020-03_25", "ReportType": "AD_PERFORMANCE_REPORT", - "DateRangeType": "ALL_TIME", + "DateRangeType": "CUSTOM_DATE", "Format": "TSV", "IncludeVAT": "NO" } } ), ( + "CUSTOM_DATE", { "report_language": "en", "filters": ( @@ -131,7 +136,7 @@ class TestYandexStatisticsReader(unittest.TestCase): "FieldNames": ["AdFormat", "AdGroupId"], "ReportName": "stats_report_2020-03_25", "ReportType": "AD_PERFORMANCE_REPORT", - "DateRangeType": "ALL_TIME", + "DateRangeType": "CUSTOM_DATE", "Format": "TSV", "IncludeVAT": "NO" } @@ -140,6 +145,7 @@ class TestYandexStatisticsReader(unittest.TestCase): ]) def test_get_query_body( self, + date_range, kwargs, include_vat, expected_query_body @@ -149,7 +155,7 @@ def test_get_query_body( ("AdFormat", "AdGroupId"), "AD_PERFORMANCE_REPORT", "stats_report_2020-03_25", - "ALL_TIME", + date_range, include_vat, report_language=kwargs["report_language"], filters=kwargs["filters"], @@ -181,3 +187,77 @@ def test_request_headers(self, report_language): }, reader._build_request_headers() ) + + @parameterized.expand([ + ( + "ALL_TIME", + None, + None, + {} + ), + ( + "CUSTOM_DATE", + datetime.datetime(2020, 1, 1), + datetime.datetime(2020, 1, 2), + { + "DateFrom": "2020-01-01", + "DateTo": "2020-01-02" + } + ) + ]) + def test_custom_dates_correctly_set(self, date_range, start_date, stop_date, expected): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + date_range, + True, + date_start=start_date, + date_stop=stop_date + ) + self.assertDictEqual( + expected, + reader._add_custom_dates_if_set() + ) + + @parameterized.expand([ + ( + "ALL_TIME", + datetime.datetime(2020, 1, 1), + datetime.datetime(2020, 1, 2), + "Wrong date range. If start and stop dates are set, should be CUSTOM_DATE." + ), + ( + "CUSTOM_DATE", + datetime.datetime(2020, 1, 1), + None, + "Stop date missing." + ), + ( + "CUSTOM_DATE", + None, + datetime.datetime(2020, 1, 1), + "Start date missing." + ) + ]) + def test_custom_dates_not_correctly_set( + self, + date_range, + start_date, + stop_date, + error_message_expected + ): + reader = YandexStatisticsReader( + "123", + ("AdFormat", "AdGroupId"), + "AD_PERFORMANCE_REPORT", + "stats_report_2020-03_25", + date_range, + True, + date_start=start_date, + date_stop=stop_date + ) + with self.assertRaises(click.ClickException) as click_exception: + reader._add_custom_dates_if_set() + self.assertEquals(click_exception.exception.message, error_message_expected) From 8b072cc06edc9a10a68c8905e766df9684a786ac Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 16:49:39 +0100 Subject: [PATCH 42/66] Feature: add documentation --- nck/readers/README.md | 69 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/nck/readers/README.md b/nck/readers/README.md index 8dfa9449..9e8c181d 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -154,3 +154,72 @@ python nck/entrypoint.py read_search_console --search-console-client-id --yandex-field-name Id --yandex-field-name Name --yandex-field-name DailyBudget write_console +``` + +#### Parameters + +| CLI option | Documentation | +| ---------- | ------------- | +| `--yandex-token` | Bear token that allows you to authenticate to the API | +| `--yandex-campaign-id` | (Optional) Selects campaigns with the specified IDs. | +| `--yandex-campaign-state` | (Optional) Selects campaigns with the specified [states](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status). | +| `--yandex-campaign-status` | (Optional) Selects campaigns with the specified [statuses](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status). | +| `--yandex-campaign-payment-status` | (Optional) Selects campaigns with the specified payment [statuses](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status). | +| `--yandex-field-name` | Parameters to get that are common to all types of campaigns. | + +### Yandex statistics reader + +[Official documentation](https://tech.yandex.com/direct/doc/reports/reports-docpage/) + +#### Quickstart + +The command below gives you a performance report for all your campaigns and since the beginning. + +```bash +python nck/entrypoint.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_DATE write_console +``` + +#### Parameters + +Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/). + +| CLI option | Documentation | +| ---------- | ------------- | +| `--yandex-token` | Bear token that allows you to authenticate to the API | +| `--yandex-report-language` | (Optional) Language of the report. See all options [here](https://tech.yandex.com/direct/doc/dg/concepts/headers-docpage/#headers__accept-language). | +| `--yandex-filter` | (Optional) Filters on a particular field. | +| `--yandex-max-rows` | (Optional) The maximum number of rows in the report. | +| `--yandex-field-name` | Information you want to collect. Complete list [here](https://tech.yandex.com/direct/doc/reports/fields-list-docpage/). | +| `--yandex-report-type` | Type of report. Linked to the fields you want to select. | +| `--yandex-date-range` | List [here](https://tech.yandex.com/direct/doc/reports/period-docpage/). | +| `--yandex-include-vat` | Adds VAT to your expenses if set to `True`| +| `--yandex-date-start` | Selects data on a specific period of time. Combined with `--yandex-date-stop`. | +| `--yandex-date-stop` | Selects data on a specific period of time. Combined with `--yandex-date-start`. | \ No newline at end of file From bdecd180756a67d935636aeb645d917970f807af Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 17:50:30 +0100 Subject: [PATCH 43/66] Fix: doc --- nck/readers/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 9e8c181d..f1e7afef 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -158,7 +158,7 @@ See the documents below for a better understanding of the parameters: ## Yandex readers For now, there is only one Yandex API you can access through Nautilus connectors: [Direct API](https://tech.yandex.com/direct/). -This API allows you to collect display and video metrics. +This API allows you to collect display metrics. ### Access Yandex Direct API @@ -221,5 +221,5 @@ Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/ | `--yandex-report-type` | Type of report. Linked to the fields you want to select. | | `--yandex-date-range` | List [here](https://tech.yandex.com/direct/doc/reports/period-docpage/). | | `--yandex-include-vat` | Adds VAT to your expenses if set to `True`| -| `--yandex-date-start` | Selects data on a specific period of time. Combined with `--yandex-date-stop`. | -| `--yandex-date-stop` | Selects data on a specific period of time. Combined with `--yandex-date-start`. | \ No newline at end of file +| `--yandex-date-start` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-stop` and `--yandex-date-range` set to `CUSTOM_DATE`. | +| `--yandex-date-stop` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`. | \ No newline at end of file From b11f23d919170fa0ab49da717d36d9025e459dca Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 18:18:57 +0100 Subject: [PATCH 44/66] Feature: add troubleshooting guide --- nck/readers/README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index f1e7afef..efab27ef 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -184,6 +184,8 @@ If you want to quickly get to the point, here is a simple command that get the d python nck/entrypoint.py read_yandex_campaigns --yandex-token --yandex-field-name Id --yandex-field-name Name --yandex-field-name DailyBudget write_console ``` +Didn't work? See [troubleshooting](#troubleshooting) section. + #### Parameters | CLI option | Documentation | @@ -207,6 +209,8 @@ The command below gives you a performance report for all your campaigns and sinc python nck/entrypoint.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_DATE write_console ``` +Didn't work? See [troubleshooting](#troubleshooting) section. + #### Parameters Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/). @@ -222,4 +226,13 @@ Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/ | `--yandex-date-range` | List [here](https://tech.yandex.com/direct/doc/reports/period-docpage/). | | `--yandex-include-vat` | Adds VAT to your expenses if set to `True`| | `--yandex-date-start` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-stop` and `--yandex-date-range` set to `CUSTOM_DATE`. | -| `--yandex-date-stop` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`. | \ No newline at end of file +| `--yandex-date-stop` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`. | + +### Troubleshooting + +You encountered and you don't know what 's going on. You may find an answer in the troubleshooting guide below. + +1. **Have you install NCK dependencies?** In order to run NCK, you need to install all dependencies. First create a [virtual environment](https://docs.python.org/3/library/venv.html) and then run `pip install -r requirements.txt`. +2. **Have you set `PYTHONPATH` environment variable to the root of NCK folder?** +3. **Have you checked logs?** The code has been implmented so that every error is logged. For example, if you did not provide a valid token, you will see something like ```Invalid request. +{'error': {'error_code': '53', 'request_id': '8998435864716615689', 'error_string': 'Authorization error', 'error_detail': 'Invalid OAuth token'}}```. If you misspelled a field, you will get a message like this one: ```Error: Invalid value for "--yandex-field-name"```. From 5cf859da3d4de03c35cced06b83836e211abb428 Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 18:21:05 +0100 Subject: [PATCH 45/66] Fix: typo in command example --- nck/readers/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index efab27ef..b34fc950 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -206,7 +206,7 @@ Didn't work? See [troubleshooting](#troubleshooting) section. The command below gives you a performance report for all your campaigns and since the beginning. ```bash -python nck/entrypoint.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_DATE write_console +python nck/entrypoint.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_TIME write_console ``` Didn't work? See [troubleshooting](#troubleshooting) section. From 422fc1e789dbeb19dc5f9edd3770776be1c22c7c Mon Sep 17 00:00:00 2001 From: benoitgoujon Date: Thu, 26 Mar 2020 18:41:59 +0100 Subject: [PATCH 46/66] Fix: cover all date combinations --- nck/readers/yandex_statistics_reader.py | 23 +++++++++----- .../readers/test_yandex_statistics_reader.py | 30 +++++++++++++++++-- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py index 0506354a..f5b9eebd 100644 --- a/nck/readers/yandex_statistics_reader.py +++ b/nck/readers/yandex_statistics_reader.py @@ -216,17 +216,26 @@ def _add_custom_dates_if_set(self) -> Dict: ): raise click.ClickException("Wrong date range. If start and stop dates are set, should be CUSTOM_DATE.") elif ( - self.kwargs["date_start"] is not None - and self.kwargs["date_stop"] is None - and self.date_range == "CUSTOM_DATE" + ( + self.kwargs["date_start"] is not None + or self.kwargs["date_stop"] is not None + ) + and self.date_range != "CUSTOM_DATE" ): - raise click.ClickException("Stop date missing.") + raise click.ClickException( + ( + "Wrong combination of date parameters. " + "Only use date start and date stop with date range set to CUSTOM_DATE." + ) + ) elif ( - self.kwargs["date_start"] is None - and self.kwargs["date_stop"] is not None + ( + self.kwargs["date_start"] is None + or self.kwargs["date_stop"] is None + ) and self.date_range == "CUSTOM_DATE" ): - raise click.ClickException("Start date missing.") + raise click.ClickException("Missing at least one date. Have you set start and stop dates?") return selection_criteria def read(self): diff --git a/tests/readers/test_yandex_statistics_reader.py b/tests/readers/test_yandex_statistics_reader.py index 93794ee0..e27532a5 100644 --- a/tests/readers/test_yandex_statistics_reader.py +++ b/tests/readers/test_yandex_statistics_reader.py @@ -228,18 +228,42 @@ def test_custom_dates_correctly_set(self, date_range, start_date, stop_date, exp datetime.datetime(2020, 1, 2), "Wrong date range. If start and stop dates are set, should be CUSTOM_DATE." ), + ( + "CUSTOM_DATE", + None, + None, + "Missing at least one date. Have you set start and stop dates?" + ), ( "CUSTOM_DATE", datetime.datetime(2020, 1, 1), None, - "Stop date missing." + "Missing at least one date. Have you set start and stop dates?" ), ( "CUSTOM_DATE", None, datetime.datetime(2020, 1, 1), - "Start date missing." - ) + "Missing at least one date. Have you set start and stop dates?" + ), + ( + "ALL_TIME", + None, + datetime.datetime(2020, 1, 1), + ( + "Wrong combination of date parameters. " + "Only use date start and date stop with date range set to CUSTOM_DATE." + ) + ), + ( + "ALL_TIME", + datetime.datetime(2020, 1, 1), + None, + ( + "Wrong combination of date parameters. " + "Only use date start and date stop with date range set to CUSTOM_DATE." + ) + ), ]) def test_custom_dates_not_correctly_set( self, From da9b0690956d9fe74e1eee5f5ecb3ec1c5dcbf63 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 27 Mar 2020 16:43:59 +0100 Subject: [PATCH 47/66] handle several advertisers + add tests --- nck/clients/sa360_client.py | 44 ++++++++------------------- nck/readers/sa360_reader.py | 40 ++++++++++++++---------- tests/clients/test_sa360_client.py | 33 ++++++++++++++++++++ tests/readers/test_sa360_reader.py | 49 ++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 48 deletions(-) create mode 100644 tests/clients/test_sa360_client.py create mode 100644 tests/readers/test_sa360_reader.py diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py index c953855a..4b71d2e6 100644 --- a/nck/clients/sa360_client.py +++ b/nck/clients/sa360_client.py @@ -23,9 +23,7 @@ from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery - logger = logging.getLogger("SA360_client") - DOWNLOAD_FORMAT = "CSV" @@ -50,6 +48,17 @@ def __init__(self, access_token, client_id, client_secret, refresh_token): ) self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) + def get_all_advertisers_of_agency(self, agency_id): + body = { + "reportScope": {"agencyId": agency_id}, + "reportType": "advertiser", + "columns": [{"columnName": "advertiserId"}], + "statisticsCurrency": "usd", + } + report = self._service.reports().generate(body=body).execute() + advertiser_ids = [row["advertiserId"] for row in report["rows"]] + return advertiser_ids + @staticmethod def generate_report_body( agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions @@ -73,7 +82,7 @@ def request_report_id(self, body): logger.info("Report requested!") return report["id"] - @retry(wait=wait_exponential(multiplier=1, min=1, max=8), stop=stop_after_delay(3600)) + @retry(wait=wait_exponential(multiplier=60, min=60, max=600), stop=stop_after_delay(3600)) def assert_report_file_ready(self, report_id): """Poll the API with the reportId until the report is ready, up to 100 times. @@ -107,9 +116,6 @@ def download_fragment(self, report_id, fragment): report_fragment: The 0-based index of the file fragment from the files array. currency_code: the currency code of the report """ - # csv_fragment_report = (self._service.reports().getFile(reportId=report_id, reportFragment=fragment).execute()) - # print(csv_fragment_report) - # print(io.BytesIO(csv_fragment_report)) request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) headers = request.headers headers.update({"Authorization": self.auth}) @@ -117,32 +123,6 @@ def download_fragment(self, report_id, fragment): yield from r.iter_lines() - # i = 0 - # index = 0 - # impr_keyword = 0 - # for row in r.iter_lines(): - # decoded_row = row.decode("utf-8") - # if "impr" in decoded_row: - # decoded_row = decoded_row.split(",") - # index = decoded_row.index("impr") - # continue - # - # if "samsung note 10+ 6.8" in decoded_row: - # r = decoded_row.split(",") - # impr_keyword += int(r[index]) - # print(decoded_row) - # decoded_row = decoded_row.split(",") - # i += int(decoded_row[index]) - # print("IMPRESSIONS", i, impr_keyword) - - # yield from r.iter_lines() - - # df = pd.DataFrame.from_csv(io.BytesIO(csv_fragment_report)) - # df["currency_code"] = currency_code - # from tabulate import tabulate - # print(tabulate(df, headers='keys', tablefmt='psql')) - # return df - def direct_report_download(self, report_id, file_id): # Retrieve the file metadata. report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index b201a6db..5a9eea27 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -37,7 +37,12 @@ @click.option("--sa360-client-secret", required=True) @click.option("--sa360-refresh-token", required=True) @click.option("--sa360-agency-id", required=True) -@click.option("--sa360-advertiser-id", "sa360_advertiser_ids", required=True, multiple=True) +@click.option( + "--sa360-advertiser-id", + "sa360_advertiser_ids", + multiple=True, + help="If empty, all advertisers from agency will be requested", +) @click.option("--sa360-report-name", default="SA360 Report") @click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) @click.option( @@ -100,24 +105,27 @@ def format_response(self, report_generator): yield next(csv_reader) def result_generator(self): - advertiser_id = next((a for a in self.advertiser_ids), "") - body = self.sa360_client.generate_report_body( - self.agency_id, - advertiser_id, - self.report_type, - self.columns, - self.start_date, - self.end_date, - self.custom_dimensions, - self.custom_metrics, - ) + for advertiser_id in self.advertiser_ids: + body = self.sa360_client.generate_report_body( + self.agency_id, + advertiser_id, + self.report_type, + self.columns, + self.start_date, + self.end_date, + self.custom_dimensions, + self.custom_metrics, + ) - report_id = self.sa360_client.request_report_id(body) + report_id = self.sa360_client.request_report_id(body) - report_data = self.sa360_client.assert_report_file_ready(report_id) + report_data = self.sa360_client.assert_report_file_ready(report_id) - for report_generator in self.sa360_client.download_report_files(report_data, report_id): - yield from self.format_response(report_generator) + for report_generator in self.sa360_client.download_report_files(report_data, report_id): + yield from self.format_response(report_generator) def read(self): + if not self.advertiser_ids: + self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency(self.agency_id) + yield NormalizedJSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) diff --git a/tests/clients/test_sa360_client.py b/tests/clients/test_sa360_client.py new file mode 100644 index 00000000..9c30e8ff --- /dev/null +++ b/tests/clients/test_sa360_client.py @@ -0,0 +1,33 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase +from nck.clients.sa360_client import SA360Client + + +class SA360ClientTest(TestCase): + def test_generate_all_columns(self): + standard = ["clicks", "impressions"] + custom_dimensions = ["customDimension"] + custom_metrics = ["customMetric"] + expected = [ + {"columnName": "clicks"}, + {"columnName": "impressions"}, + {"columnDimensionName": "customDimension", "platformSource": "floodlight"}, + {"columnMetricName": "customMetric", "platformSource": "floodlight"}, + ] + assert SA360Client.generate_columns(standard, custom_dimensions, custom_metrics) == expected diff --git a/tests/readers/test_sa360_reader.py b/tests/readers/test_sa360_reader.py new file mode 100644 index 00000000..42f52292 --- /dev/null +++ b/tests/readers/test_sa360_reader.py @@ -0,0 +1,49 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase, mock +import logging + +from nck.readers.sa360_reader import SA360Reader + +logger = logging.getLogger("SA360_reader_test") + + +class SA360ReaderTest(TestCase): + def mock_sa360_reader(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + kwargs = {"all_columns": ["impressions", "clicks"]} + + @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) + def test_empty_data(self): + reader = SA360Reader(**self.kwargs) + input_report = (row for row in [b"Just Headers in this empty report"]) + if len(list(reader.format_response(input_report))) > 0: + assert False, "Data is not empty" + + @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) + def test_format_data(self): + reader = SA360Reader(**self.kwargs) + input_report = (row for row in [b"impressions,clicks", b"1,2", b"3,4"]) + expected = [{"impressions": "1", "clicks": "2"}, {"impressions": "3", "clicks": "4"}] + input_list = list(reader.format_response(input_report)) + assert len(input_list) == len(expected) + + for input_row, output in zip(input_list, expected): + assert input_row == output From 5e12f82884789c1319d7c6fbdc7bbae819febde4 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 25 Mar 2020 15:30:15 +0100 Subject: [PATCH 48/66] sa360 reader v0 --- .env | 4 +- nck/clients/sa360_client.py | 171 ++++++++++++++++++++++++++++++++++++ nck/helpers/sa360_helper.py | 41 +++++++++ nck/readers/__init__.py | 2 + nck/readers/sa360_reader.py | 123 ++++++++++++++++++++++++++ 5 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 nck/clients/sa360_client.py create mode 100644 nck/helpers/sa360_helper.py create mode 100644 nck/readers/sa360_reader.py diff --git a/.env b/.env index cf087144..438ecf16 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connector-kit-dev -DOCKER_TAG=FBURL +DOCKER_IMAGE=nautilus-connector-kit +DOCKER_TAG=1.4.0 DOCKER_REGISTRY=eu.gcr.io diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py new file mode 100644 index 00000000..c953855a --- /dev/null +++ b/nck/clients/sa360_client.py @@ -0,0 +1,171 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import logging +import httplib2 +import requests + +from tenacity import retry, wait_exponential, stop_after_delay +from oauth2client import client, GOOGLE_TOKEN_URI +from googleapiclient import discovery + + +logger = logging.getLogger("SA360_client") + +DOWNLOAD_FORMAT = "CSV" + + +class SA360Client: + API_NAME = "doubleclicksearch" + API_VERSION = "v2" + + def __init__(self, access_token, client_id, client_secret, refresh_token): + self._credentials = client.GoogleCredentials( + access_token=access_token, + client_id=client_id, + client_secret=client_secret, + refresh_token=refresh_token, + token_expiry=None, + token_uri=GOOGLE_TOKEN_URI, + user_agent=None, + ) + http = self._credentials.authorize(httplib2.Http()) + self._credentials.refresh(http) + self.auth = ( + f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" + ) + self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) + + @staticmethod + def generate_report_body( + agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions + ): + all_columns = SA360Client.generate_columns(columns, custom_metrics, custom_dimensions) + body = { + "reportScope": {"agencyId": agency_id, "advertiserId": advertiser_id}, + "reportType": report_type, + "columns": all_columns, + "timeRange": SA360Client.get_date_range(start_date, end_date), + "downloadFormat": "csv", + "maxRowsPerFile": 4000000, + "statisticsCurrency": "usd", + } + logger.info("Report Body Generated") + + return body + + def request_report_id(self, body): + report = self._service.reports().request(body=body).execute() + logger.info("Report requested!") + return report["id"] + + @retry(wait=wait_exponential(multiplier=1, min=1, max=8), stop=stop_after_delay(3600)) + def assert_report_file_ready(self, report_id): + """Poll the API with the reportId until the report is ready, up to 100 times. + + Args: + report_id: The ID SA360 has assigned to a report. + """ + request = self._service.reports().get(reportId=report_id) + report_data = request.execute() + if report_data["isReportReady"]: + logger.info("The report is ready.") + + # For large reports, SA360 automatically fragments the report into multiple + # files. The 'files' property in the JSON object that SA360 returns contains + # the list of URLs for file fragment. To download a report, SA360 needs to + # know the report ID and the index of a file fragment. + return report_data + else: + logger.info("Report is not ready.") + raise FileNotFoundError + + def download_report_files(self, json_data, report_id): + for fragment in range(len(json_data["files"])): + logger.info(f"Downloading fragment {str(fragment)} for report {report_id}") + yield self.download_fragment(report_id, str(fragment)) + + def download_fragment(self, report_id, fragment): + """Generate and convert to df a report fragment. + + Args: + report_id: The ID SA360 has assigned to a report. + report_fragment: The 0-based index of the file fragment from the files array. + currency_code: the currency code of the report + """ + # csv_fragment_report = (self._service.reports().getFile(reportId=report_id, reportFragment=fragment).execute()) + # print(csv_fragment_report) + # print(io.BytesIO(csv_fragment_report)) + request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) + headers = request.headers + headers.update({"Authorization": self.auth}) + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() + + # i = 0 + # index = 0 + # impr_keyword = 0 + # for row in r.iter_lines(): + # decoded_row = row.decode("utf-8") + # if "impr" in decoded_row: + # decoded_row = decoded_row.split(",") + # index = decoded_row.index("impr") + # continue + # + # if "samsung note 10+ 6.8" in decoded_row: + # r = decoded_row.split(",") + # impr_keyword += int(r[index]) + # print(decoded_row) + # decoded_row = decoded_row.split(",") + # i += int(decoded_row[index]) + # print("IMPRESSIONS", i, impr_keyword) + + # yield from r.iter_lines() + + # df = pd.DataFrame.from_csv(io.BytesIO(csv_fragment_report)) + # df["currency_code"] = currency_code + # from tabulate import tabulate + # print(tabulate(df, headers='keys', tablefmt='psql')) + # return df + + def direct_report_download(self, report_id, file_id): + # Retrieve the file metadata. + report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() + + if report_file["status"] == "REPORT_AVAILABLE": + # Create a get request. + request = self._service.files().get_media(reportId=report_id, fileId=file_id) + headers = request.headers + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() + + @staticmethod + def generate_columns(columns, custom_dimensions, custom_metrics): + standard = [{"columnName": column} for column in columns] + dimensions = [{"columnDimensionName": column, "platformSource": "floodlight"} for column in custom_dimensions] + metrics = [{"columnMetricName": column, "platformSource": "floodlight"} for column in custom_metrics] + + return standard + dimensions + metrics + + @staticmethod + def get_date_range(start_date, end_date): + start = start_date.strftime("%Y-%m-%d") + end = end_date.strftime("%Y-%m-%d") + logger.warning(f"Custom date range selected: {start} --> {end}") + return {"startDate": start, "endDate": end} diff --git a/nck/helpers/sa360_helper.py b/nck/helpers/sa360_helper.py new file mode 100644 index 00000000..7375996c --- /dev/null +++ b/nck/helpers/sa360_helper.py @@ -0,0 +1,41 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +REPORT_TYPES = [ + "advertiser", + "account", + "ad", + "adGroup", + "adGroupTarget", + "bidStrategy", + "campaign", + "campaignTarget", + "conversion", + "feedltem", + "floodlightActivity", + "keyword", + "negativeAdGroupKeyword", + "negativeAdGroupTarget", + "negativeCampaignKeyword", + "negativeCampaignTarget", + "paidAndOrganic", + "productAdvertised", + "productGroup", + "productLeadAndCrossSell", + "productTarget", + "visit", +] diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 8c5375a8..7d9993bb 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -21,6 +21,7 @@ from nck.readers.gcs_reader import gcs from nck.readers.googleads_reader import google_ads from nck.readers.s3_reader import s3 +from nck.readers.sa360_reader import sa360_reader from nck.readers.oracle_reader import oracle from nck.readers.gsheets_reader import gsheets from nck.readers.salesforce_reader import salesforce @@ -41,6 +42,7 @@ gcs, google_ads, s3, + sa360_reader, facebook_marketing, oracle, dbm, diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py new file mode 100644 index 00000000..b201a6db --- /dev/null +++ b/nck/readers/sa360_reader.py @@ -0,0 +1,123 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import csv +import click + +from io import StringIO + +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.utils.args import extract_args +from nck.streams.normalized_json_stream import NormalizedJSONStream +from nck.clients.sa360_client import SA360Client +from nck.helpers.sa360_helper import REPORT_TYPES + +DATEFORMAT = "%Y-%m-%d" +ENCODING = "utf-8" + + +@click.command(name="read_sa360") +@click.option("--sa360-access-token", default=None) +@click.option("--sa360-client-id", required=True) +@click.option("--sa360-client-secret", required=True) +@click.option("--sa360-refresh-token", required=True) +@click.option("--sa360-agency-id", required=True) +@click.option("--sa360-advertiser-id", "sa360_advertiser_ids", required=True, multiple=True) +@click.option("--sa360-report-name", default="SA360 Report") +@click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) +@click.option( + "--sa360-column", "sa360_columns", multiple=True, help="https://developers.google.com/search-ads/v2/report-types" +) +@click.option( + "--sa360-custom-dimension", + "sa360_custom_dimensions", + multiple=True, + help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", +) +@click.option( + "--sa360-custom-metric", + "sa360_custom_metrics", + multiple=True, + help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", +) +@click.option("--sa360-start-date", type=click.DateTime(), required=True) +@click.option("--sa360-end-date", type=click.DateTime(), required=True) +@processor("sa360_access_token", "sa360_refresh_token", "sa360_client_secret") +def sa360_reader(**kwargs): + return SA360Reader(**extract_args("sa360_", kwargs)) + + +class SA360Reader(Reader): + def __init__( + self, + access_token, + client_id, + client_secret, + refresh_token, + agency_id, + advertiser_ids, + report_name, + report_type, + columns, + custom_metrics, + custom_dimensions, + start_date, + end_date, + ): + self.sa360_client = SA360Client(access_token, client_id, client_secret, refresh_token) + self.agency_id = agency_id + self.advertiser_ids = list(advertiser_ids) + self.report_name = report_name + self.report_type = report_type + self.columns = list(columns) + self.custom_metrics = list(custom_metrics) + self.custom_dimensions = list(custom_dimensions) + self.all_columns = self.columns + self.custom_dimensions + self.custom_metrics + self.start_date = start_date + self.end_date = end_date + + def format_response(self, report_generator): + # skip headers in the CSV output + next(report_generator) + for row in report_generator: + decoded_row = row.decode(ENCODING) + csv_reader = csv.DictReader(StringIO(decoded_row), self.all_columns) + yield next(csv_reader) + + def result_generator(self): + advertiser_id = next((a for a in self.advertiser_ids), "") + body = self.sa360_client.generate_report_body( + self.agency_id, + advertiser_id, + self.report_type, + self.columns, + self.start_date, + self.end_date, + self.custom_dimensions, + self.custom_metrics, + ) + + report_id = self.sa360_client.request_report_id(body) + + report_data = self.sa360_client.assert_report_file_ready(report_id) + + for report_generator in self.sa360_client.download_report_files(report_data, report_id): + yield from self.format_response(report_generator) + + def read(self): + yield NormalizedJSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) From 0d041b34edf47b132bd0039f962828cef879e44f Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 27 Mar 2020 16:43:59 +0100 Subject: [PATCH 49/66] handle several advertisers + add tests --- nck/clients/sa360_client.py | 44 ++++++++------------------- nck/readers/sa360_reader.py | 40 ++++++++++++++---------- tests/clients/test_sa360_client.py | 33 ++++++++++++++++++++ tests/readers/test_sa360_reader.py | 49 ++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 48 deletions(-) create mode 100644 tests/clients/test_sa360_client.py create mode 100644 tests/readers/test_sa360_reader.py diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py index c953855a..4b71d2e6 100644 --- a/nck/clients/sa360_client.py +++ b/nck/clients/sa360_client.py @@ -23,9 +23,7 @@ from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery - logger = logging.getLogger("SA360_client") - DOWNLOAD_FORMAT = "CSV" @@ -50,6 +48,17 @@ def __init__(self, access_token, client_id, client_secret, refresh_token): ) self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) + def get_all_advertisers_of_agency(self, agency_id): + body = { + "reportScope": {"agencyId": agency_id}, + "reportType": "advertiser", + "columns": [{"columnName": "advertiserId"}], + "statisticsCurrency": "usd", + } + report = self._service.reports().generate(body=body).execute() + advertiser_ids = [row["advertiserId"] for row in report["rows"]] + return advertiser_ids + @staticmethod def generate_report_body( agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions @@ -73,7 +82,7 @@ def request_report_id(self, body): logger.info("Report requested!") return report["id"] - @retry(wait=wait_exponential(multiplier=1, min=1, max=8), stop=stop_after_delay(3600)) + @retry(wait=wait_exponential(multiplier=60, min=60, max=600), stop=stop_after_delay(3600)) def assert_report_file_ready(self, report_id): """Poll the API with the reportId until the report is ready, up to 100 times. @@ -107,9 +116,6 @@ def download_fragment(self, report_id, fragment): report_fragment: The 0-based index of the file fragment from the files array. currency_code: the currency code of the report """ - # csv_fragment_report = (self._service.reports().getFile(reportId=report_id, reportFragment=fragment).execute()) - # print(csv_fragment_report) - # print(io.BytesIO(csv_fragment_report)) request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) headers = request.headers headers.update({"Authorization": self.auth}) @@ -117,32 +123,6 @@ def download_fragment(self, report_id, fragment): yield from r.iter_lines() - # i = 0 - # index = 0 - # impr_keyword = 0 - # for row in r.iter_lines(): - # decoded_row = row.decode("utf-8") - # if "impr" in decoded_row: - # decoded_row = decoded_row.split(",") - # index = decoded_row.index("impr") - # continue - # - # if "samsung note 10+ 6.8" in decoded_row: - # r = decoded_row.split(",") - # impr_keyword += int(r[index]) - # print(decoded_row) - # decoded_row = decoded_row.split(",") - # i += int(decoded_row[index]) - # print("IMPRESSIONS", i, impr_keyword) - - # yield from r.iter_lines() - - # df = pd.DataFrame.from_csv(io.BytesIO(csv_fragment_report)) - # df["currency_code"] = currency_code - # from tabulate import tabulate - # print(tabulate(df, headers='keys', tablefmt='psql')) - # return df - def direct_report_download(self, report_id, file_id): # Retrieve the file metadata. report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index b201a6db..5a9eea27 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -37,7 +37,12 @@ @click.option("--sa360-client-secret", required=True) @click.option("--sa360-refresh-token", required=True) @click.option("--sa360-agency-id", required=True) -@click.option("--sa360-advertiser-id", "sa360_advertiser_ids", required=True, multiple=True) +@click.option( + "--sa360-advertiser-id", + "sa360_advertiser_ids", + multiple=True, + help="If empty, all advertisers from agency will be requested", +) @click.option("--sa360-report-name", default="SA360 Report") @click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) @click.option( @@ -100,24 +105,27 @@ def format_response(self, report_generator): yield next(csv_reader) def result_generator(self): - advertiser_id = next((a for a in self.advertiser_ids), "") - body = self.sa360_client.generate_report_body( - self.agency_id, - advertiser_id, - self.report_type, - self.columns, - self.start_date, - self.end_date, - self.custom_dimensions, - self.custom_metrics, - ) + for advertiser_id in self.advertiser_ids: + body = self.sa360_client.generate_report_body( + self.agency_id, + advertiser_id, + self.report_type, + self.columns, + self.start_date, + self.end_date, + self.custom_dimensions, + self.custom_metrics, + ) - report_id = self.sa360_client.request_report_id(body) + report_id = self.sa360_client.request_report_id(body) - report_data = self.sa360_client.assert_report_file_ready(report_id) + report_data = self.sa360_client.assert_report_file_ready(report_id) - for report_generator in self.sa360_client.download_report_files(report_data, report_id): - yield from self.format_response(report_generator) + for report_generator in self.sa360_client.download_report_files(report_data, report_id): + yield from self.format_response(report_generator) def read(self): + if not self.advertiser_ids: + self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency(self.agency_id) + yield NormalizedJSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) diff --git a/tests/clients/test_sa360_client.py b/tests/clients/test_sa360_client.py new file mode 100644 index 00000000..9c30e8ff --- /dev/null +++ b/tests/clients/test_sa360_client.py @@ -0,0 +1,33 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase +from nck.clients.sa360_client import SA360Client + + +class SA360ClientTest(TestCase): + def test_generate_all_columns(self): + standard = ["clicks", "impressions"] + custom_dimensions = ["customDimension"] + custom_metrics = ["customMetric"] + expected = [ + {"columnName": "clicks"}, + {"columnName": "impressions"}, + {"columnDimensionName": "customDimension", "platformSource": "floodlight"}, + {"columnMetricName": "customMetric", "platformSource": "floodlight"}, + ] + assert SA360Client.generate_columns(standard, custom_dimensions, custom_metrics) == expected diff --git a/tests/readers/test_sa360_reader.py b/tests/readers/test_sa360_reader.py new file mode 100644 index 00000000..42f52292 --- /dev/null +++ b/tests/readers/test_sa360_reader.py @@ -0,0 +1,49 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase, mock +import logging + +from nck.readers.sa360_reader import SA360Reader + +logger = logging.getLogger("SA360_reader_test") + + +class SA360ReaderTest(TestCase): + def mock_sa360_reader(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + kwargs = {"all_columns": ["impressions", "clicks"]} + + @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) + def test_empty_data(self): + reader = SA360Reader(**self.kwargs) + input_report = (row for row in [b"Just Headers in this empty report"]) + if len(list(reader.format_response(input_report))) > 0: + assert False, "Data is not empty" + + @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) + def test_format_data(self): + reader = SA360Reader(**self.kwargs) + input_report = (row for row in [b"impressions,clicks", b"1,2", b"3,4"]) + expected = [{"impressions": "1", "clicks": "2"}, {"impressions": "3", "clicks": "4"}] + input_list = list(reader.format_response(input_report)) + assert len(input_list) == len(expected) + + for input_row, output in zip(input_list, expected): + assert input_row == output From 5f61c77f1fa6f2550c44030c8ecd2b23e7c68d78 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 27 Mar 2020 17:38:31 +0100 Subject: [PATCH 50/66] add documentation --- nck/readers/README.md | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/nck/readers/README.md b/nck/readers/README.md index b34fc950..52b308c4 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -155,6 +155,63 @@ python nck/entrypoint.py read_search_console --search-console-client-id and + +- A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling + +See the [documentation here](https://developers.google.com/search-ads/v2/authorizing "SA360 Authentication") +to set-up your OAuth2 credentials and refresh token specifically for Searc hAds 360 Reporting. + + +#### Which Reports and Metrics are available in the API + +The list of available reports for the API, and the associated metrics, can be [found here](https://developers.google.com/search-ads/v2/report-types "Report Types") + +#### Simple API call example + +- Call Example + + +The following command retrieves insights about the Ads in the Search Ads 360 Account from the agency thanks to +your , and with the necessary permissions to access your Accounts. + +``` +python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-agency-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 +``` + +*If it doesn't work, try to* `export PYTHONPATH="."` *in the nautilus-connector-kit folder (to be sure Python is reading correctly)* +*If you want the output to be printed in your console, add* `write_console` *at the end of your command (see writers for more details)* + + +- Parameters of the SA360 Reader + +| CLI option | Documentation | +| ---------- | ------------- | +|`--sa360-access-token` | (Optional) Access token | +|`--sa360-client-id` | OAuth2 ID | +|`--sa360-client-secret` | OAuth2 ID Secret | +|`--sa360-refresh-token` | Refresh token | +|`--sa360-agency-id` | Agency ID to request in SA360 | +|`--sa360-advertiser-id` | (Optional) Advertiser ids to request. If not provided, every advertiser of the agency will be requested| +|`--sa360-report-name` | (Optional) Name of the output report | +|`--sa360-report-type` | Type of the report to request. List [here](https://developers.google.com/search-ads/v2/report-types)| +|`--sa360-column` | Dimensions and metrics to request in the report | +|`--sa360-custom-dimension` | (Optional) Custom dimensions to report | +|`--sa360-custom-metric` | (Optional) Custom metrics to report | +|`--sa360-start-date` | Start date of the period to request | +|`--sa360-end-date` | End date of the period to request | + +See the documents below for a better understanding of the parameters: +- [SA360 Reporting](https://developers.google.com/search-ads/v2/how-tos/reporting) + + ## Yandex readers For now, there is only one Yandex API you can access through Nautilus connectors: [Direct API](https://tech.yandex.com/direct/). From aec64b5a5e32bba51505b61c3ce9f65b4bda3199 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 8 Apr 2020 16:42:37 +0200 Subject: [PATCH 51/66] add saved_columns + fix typo --- nck/clients/sa360_client.py | 19 +++++++------------ nck/readers/README.md | 4 ++-- nck/readers/sa360_reader.py | 23 +++++++---------------- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py index 4b71d2e6..fae755ab 100644 --- a/nck/clients/sa360_client.py +++ b/nck/clients/sa360_client.py @@ -60,10 +60,8 @@ def get_all_advertisers_of_agency(self, agency_id): return advertiser_ids @staticmethod - def generate_report_body( - agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions - ): - all_columns = SA360Client.generate_columns(columns, custom_metrics, custom_dimensions) + def generate_report_body(agency_id, advertiser_id, report_type, columns, start_date, end_date, saved_columns): + all_columns = SA360Client.generate_columns(columns, saved_columns) body = { "reportScope": {"agencyId": agency_id, "advertiserId": advertiser_id}, "reportType": report_type, @@ -74,7 +72,6 @@ def generate_report_body( "statisticsCurrency": "usd", } logger.info("Report Body Generated") - return body def request_report_id(self, body): @@ -100,7 +97,7 @@ def assert_report_file_ready(self, report_id): # know the report ID and the index of a file fragment. return report_data else: - logger.info("Report is not ready.") + logger.info("Report is not ready. Retrying...") raise FileNotFoundError def download_report_files(self, json_data, report_id): @@ -113,8 +110,7 @@ def download_fragment(self, report_id, fragment): Args: report_id: The ID SA360 has assigned to a report. - report_fragment: The 0-based index of the file fragment from the files array. - currency_code: the currency code of the report + fragment: The 0-based index of the file fragment from the files array. """ request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) headers = request.headers @@ -136,12 +132,11 @@ def direct_report_download(self, report_id, file_id): yield from r.iter_lines() @staticmethod - def generate_columns(columns, custom_dimensions, custom_metrics): + def generate_columns(columns, saved_columns): standard = [{"columnName": column} for column in columns] - dimensions = [{"columnDimensionName": column, "platformSource": "floodlight"} for column in custom_dimensions] - metrics = [{"columnMetricName": column, "platformSource": "floodlight"} for column in custom_metrics] + saved = [{"savedColumnName": column} for column in saved_columns] - return standard + dimensions + metrics + return standard + saved @staticmethod def get_date_range(start_date, end_date): diff --git a/nck/readers/README.md b/nck/readers/README.md index 52b308c4..04768304 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -167,7 +167,7 @@ Using the Search Ads API requires two things: - A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling See the [documentation here](https://developers.google.com/search-ads/v2/authorizing "SA360 Authentication") -to set-up your OAuth2 credentials and refresh token specifically for Searc hAds 360 Reporting. +to set-up your OAuth2 credentials and refresh token specifically for Search Ads 360 Reporting. #### Which Reports and Metrics are available in the API @@ -183,7 +183,7 @@ The following command retrieves insights about the Ads in the Search Ads 360 Acc your , and with the necessary permissions to access your Accounts. ``` -python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-agency-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 +python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-advertiser-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 ``` *If it doesn't work, try to* `export PYTHONPATH="."` *in the nautilus-connector-kit folder (to be sure Python is reading correctly)* diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index 5a9eea27..9ab61827 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -49,16 +49,10 @@ "--sa360-column", "sa360_columns", multiple=True, help="https://developers.google.com/search-ads/v2/report-types" ) @click.option( - "--sa360-custom-dimension", - "sa360_custom_dimensions", + "--sa360-saved-column", + "sa360_saved_columns", multiple=True, - help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", -) -@click.option( - "--sa360-custom-metric", - "sa360_custom_metrics", - multiple=True, - help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", + help="https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns", ) @click.option("--sa360-start-date", type=click.DateTime(), required=True) @click.option("--sa360-end-date", type=click.DateTime(), required=True) @@ -79,8 +73,7 @@ def __init__( report_name, report_type, columns, - custom_metrics, - custom_dimensions, + saved_columns, start_date, end_date, ): @@ -90,9 +83,8 @@ def __init__( self.report_name = report_name self.report_type = report_type self.columns = list(columns) - self.custom_metrics = list(custom_metrics) - self.custom_dimensions = list(custom_dimensions) - self.all_columns = self.columns + self.custom_dimensions + self.custom_metrics + self.saved_columns = list(saved_columns) + self.all_columns = self.columns + self.saved_columns self.start_date = start_date self.end_date = end_date @@ -113,8 +105,7 @@ def result_generator(self): self.columns, self.start_date, self.end_date, - self.custom_dimensions, - self.custom_metrics, + self.saved_columns, ) report_id = self.sa360_client.request_report_id(body) From b27370acafd8eb77b593450db895d82ed20b8f86 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 10 Apr 2020 17:40:47 +0200 Subject: [PATCH 52/66] change tests --- tests/clients/test_sa360_client.py | 12 +++--------- tests/readers/test_sa360_reader.py | 8 ++------ 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/clients/test_sa360_client.py b/tests/clients/test_sa360_client.py index 9c30e8ff..87f606f3 100644 --- a/tests/clients/test_sa360_client.py +++ b/tests/clients/test_sa360_client.py @@ -22,12 +22,6 @@ class SA360ClientTest(TestCase): def test_generate_all_columns(self): standard = ["clicks", "impressions"] - custom_dimensions = ["customDimension"] - custom_metrics = ["customMetric"] - expected = [ - {"columnName": "clicks"}, - {"columnName": "impressions"}, - {"columnDimensionName": "customDimension", "platformSource": "floodlight"}, - {"columnMetricName": "customMetric", "platformSource": "floodlight"}, - ] - assert SA360Client.generate_columns(standard, custom_dimensions, custom_metrics) == expected + saved = ["savedColumn"] + expected = [{"columnName": "clicks"}, {"columnName": "impressions"}, {"savedColumnName": "savedColumn"}] + self.assertEqual(SA360Client.generate_columns(standard, saved), expected) diff --git a/tests/readers/test_sa360_reader.py b/tests/readers/test_sa360_reader.py index 42f52292..1f37a1e4 100644 --- a/tests/readers/test_sa360_reader.py +++ b/tests/readers/test_sa360_reader.py @@ -34,8 +34,7 @@ def mock_sa360_reader(self, **kwargs): def test_empty_data(self): reader = SA360Reader(**self.kwargs) input_report = (row for row in [b"Just Headers in this empty report"]) - if len(list(reader.format_response(input_report))) > 0: - assert False, "Data is not empty" + self.assertFalse(next(reader.format_response(input_report), False), "Data is not empty") @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) def test_format_data(self): @@ -43,7 +42,4 @@ def test_format_data(self): input_report = (row for row in [b"impressions,clicks", b"1,2", b"3,4"]) expected = [{"impressions": "1", "clicks": "2"}, {"impressions": "3", "clicks": "4"}] input_list = list(reader.format_response(input_report)) - assert len(input_list) == len(expected) - - for input_row, output in zip(input_list, expected): - assert input_row == output + self.assertListEqual(input_list, expected) From 87dfa150f60f1ef8705bc9c9b16932a83a0c5b97 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 10 Apr 2020 19:54:48 +0200 Subject: [PATCH 53/66] generate_dict_from_csv --- nck/readers/sa360_reader.py | 16 +- nck/utils/text.py | 16 +- tests/readers/test_sa360_reader.py | 45 ------ tests/utils/test_text_utils.py | 242 ++++++++++++++--------------- 4 files changed, 122 insertions(+), 197 deletions(-) delete mode 100644 tests/readers/test_sa360_reader.py diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index 9ab61827..5f2528a5 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -15,17 +15,15 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import csv import click -from io import StringIO - from nck.commands.command import processor from nck.readers.reader import Reader -from nck.utils.args import extract_args from nck.streams.normalized_json_stream import NormalizedJSONStream from nck.clients.sa360_client import SA360Client from nck.helpers.sa360_helper import REPORT_TYPES +from nck.utils.args import extract_args +from nck.utils.text import get_generator_dict_from_str_csv DATEFORMAT = "%Y-%m-%d" ENCODING = "utf-8" @@ -88,14 +86,6 @@ def __init__( self.start_date = start_date self.end_date = end_date - def format_response(self, report_generator): - # skip headers in the CSV output - next(report_generator) - for row in report_generator: - decoded_row = row.decode(ENCODING) - csv_reader = csv.DictReader(StringIO(decoded_row), self.all_columns) - yield next(csv_reader) - def result_generator(self): for advertiser_id in self.advertiser_ids: body = self.sa360_client.generate_report_body( @@ -113,7 +103,7 @@ def result_generator(self): report_data = self.sa360_client.assert_report_file_ready(report_id) for report_generator in self.sa360_client.download_report_files(report_data, report_id): - yield from self.format_response(report_generator) + yield from get_generator_dict_from_str_csv(report_generator) def read(self): if not self.advertiser_ids: diff --git a/nck/utils/text.py b/nck/utils/text.py index b6ff82f1..08170d10 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -39,10 +39,7 @@ def add_column_value_to_csv_line_iterator(line_iterator, columname, value): def get_generator_dict_from_str_csv( - line_iterator: Generator[Union[bytes, str], None, None], - add_date=False, - day_range=None, - date_format="%Y-%m-%d" + line_iterator: Generator[Union[bytes, str], None, None], add_date=False, day_range=None, date_format="%Y-%m-%d" ) -> Generator[Dict[str, str], None, None]: first_line = next(line_iterator) headers = ( @@ -58,7 +55,7 @@ def get_generator_dict_from_str_csv( line = line.decode("utf-8") except UnicodeDecodeError as err: logging.warning( - "An error has occured while parsing the file. " + "An error has occurred while parsing the file. " "The line could not be decoded in %s." "Invalid input that the codec failed on: %s", err.encoding, @@ -77,8 +74,7 @@ def get_generator_dict_from_str_csv( def get_generator_dict_from_str_tsv( - line_iterator: Generator[Union[bytes, str], None, None], - skip_first_row=False + line_iterator: Generator[Union[bytes, str], None, None], skip_first_row=False ) -> Generator[Dict[str, str], None, None]: if skip_first_row: next(line_iterator) @@ -108,11 +104,7 @@ def get_generator_dict_from_str_tsv( def parse_decoded_line(line: str, delimiter=",", quotechar='"') -> List[str]: line_as_file = StringIO(line) reader = csv.reader( - line_as_file, - delimiter=delimiter, - quotechar=quotechar, - quoting=csv.QUOTE_ALL, - skipinitialspace=True, + line_as_file, delimiter=delimiter, quotechar=quotechar, quoting=csv.QUOTE_ALL, skipinitialspace=True ) return next(reader) diff --git a/tests/readers/test_sa360_reader.py b/tests/readers/test_sa360_reader.py deleted file mode 100644 index 1f37a1e4..00000000 --- a/tests/readers/test_sa360_reader.py +++ /dev/null @@ -1,45 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from unittest import TestCase, mock -import logging - -from nck.readers.sa360_reader import SA360Reader - -logger = logging.getLogger("SA360_reader_test") - - -class SA360ReaderTest(TestCase): - def mock_sa360_reader(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - - kwargs = {"all_columns": ["impressions", "clicks"]} - - @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) - def test_empty_data(self): - reader = SA360Reader(**self.kwargs) - input_report = (row for row in [b"Just Headers in this empty report"]) - self.assertFalse(next(reader.format_response(input_report), False), "Data is not empty") - - @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) - def test_format_data(self): - reader = SA360Reader(**self.kwargs) - input_report = (row for row in [b"impressions,clicks", b"1,2", b"3,4"]) - expected = [{"impressions": "1", "clicks": "2"}, {"impressions": "3", "clicks": "4"}] - input_list = list(reader.format_response(input_report)) - self.assertListEqual(input_list, expected) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index 0a9202f0..7c6f6a6e 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -16,33 +16,32 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging -import unittest +from unittest import TestCase from datetime import date from unittest.mock import patch from parameterized import parameterized -from nck.utils.text import (get_generator_dict_from_str_csv, - get_generator_dict_from_str_tsv, - parse_decoded_line) +from nck.utils.text import get_generator_dict_from_str_csv, get_generator_dict_from_str_tsv, parse_decoded_line -class TestTextUtilsMethod(unittest.TestCase): - +class TestTextUtilsMethod(TestCase): def test_multiple_encodings(self): test_string_to_encode = ( - 'BR,test_partner,123,Active,test_advertiser,123,' - '0,,test_io,123,Active,,test_line_item' + "BR,test_partner,123,Active,test_advertiser,123," + "0,,test_io,123,Active,,test_line_item" ',123,0,,"",0.00,41' ) lines = [ - (b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach"), + ( + b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + b" ID,Advertiser Status,Advertiser Integration Code,Insertion" + b" Order,Insertion Order ID,Insertion Order Status,Insertion" + b" Order Integration Code,Line Item,Line Item ID,Line Item" + b" Status,Line Item Integration Code,Targeted Data Providers," + b"Cookie Reach: Average Impression Frequency,Cookie Reach: " + b"Impression Reach" + ), test_string_to_encode.encode("utf-8"), test_string_to_encode.encode("ascii"), test_string_to_encode.encode("windows-1252"), @@ -66,36 +65,37 @@ def test_multiple_encodings(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } - for yielded_dict in get_generator_dict_from_str_csv( - line_iterator_multiple_encodings - ): + for yielded_dict in get_generator_dict_from_str_csv(line_iterator_multiple_encodings): self.assertDictEqual(yielded_dict, expected_dict) def test_blank_line(self): lines = [ - (b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach"), - "" + ( + b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + b" ID,Advertiser Status,Advertiser Integration Code,Insertion" + b" Order,Insertion Order ID,Insertion Order Status,Insertion" + b" Order Integration Code,Line Item,Line Item ID,Line Item" + b" Status,Line Item Integration Code,Targeted Data Providers," + b"Cookie Reach: Average Impression Frequency,Cookie Reach: " + b"Impression Reach" + ), + "", ] line_iterator_with_blank_line = (line for line in lines) - self.assertTrue(get_generator_dict_from_str_csv( - line_iterator_with_blank_line - )) + self.assertTrue(get_generator_dict_from_str_csv(line_iterator_with_blank_line)) lines.insert( 1, - (b'BR,test_partner,123,Active,test_advertiser,123,' - b'0,,test_io,123,Active,,test_line_item' - b',123,0,,"",0.00,41')) + ( + b"BR,test_partner,123,Active,test_advertiser,123," + b"0,,test_io,123,Active,,test_line_item" + b',123,0,,"",0.00,41' + ), + ) expected_dict = { "Country": "BR", "Partner": "test_partner", @@ -113,37 +113,37 @@ def test_blank_line(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } line_iterator_with_blank_line = (line for line in lines) - for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line - ): + for dic in get_generator_dict_from_str_csv(line_iterator_with_blank_line): self.assertDictEqual(dic, expected_dict) lines.append("This is something that should not be here.") line_iterator_with_blank_line = (line for line in lines) - test_result = get_generator_dict_from_str_csv( - line_iterator_with_blank_line - ) + test_result = get_generator_dict_from_str_csv(line_iterator_with_blank_line) self.assertEqual(len(list(test_result)), 1) for dic in test_result: self.assertEqual(dic, expected_dict) def test_invalid_byte(self): lines = [ - (b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach"), - (b'BR,test_partner,123,Active,test_advertiser,123,' - b'0,,test_io,123,Active,,test_line_item' - b',123,0,," \x91\xea\xd0$",0.00,41'), + ( + b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + b" ID,Advertiser Status,Advertiser Integration Code,Insertion" + b" Order,Insertion Order ID,Insertion Order Status,Insertion" + b" Order Integration Code,Line Item,Line Item ID,Line Item" + b" Status,Line Item Integration Code,Targeted Data Providers," + b"Cookie Reach: Average Impression Frequency,Cookie Reach: " + b"Impression Reach" + ), + ( + b"BR,test_partner,123,Active,test_advertiser,123," + b"0,,test_io,123,Active,,test_line_item" + b',123,0,," \x91\xea\xd0$",0.00,41' + ), ] line_iterator_invalid_byte = (line for line in lines) expected_dict = { @@ -163,34 +163,38 @@ def test_invalid_byte(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": ' $', + "Targeted Data Providers": " $", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } with self.assertLogs(level=logging.INFO) as cm: - for yielded_dict in get_generator_dict_from_str_csv( - line_iterator_invalid_byte - ): + for yielded_dict in get_generator_dict_from_str_csv(line_iterator_invalid_byte): self.assertDictEqual(yielded_dict, expected_dict) self.assertEqual( cm.output, - ["WARNING:root:An error has occured while parsing the file. " - "The line could not be decoded in utf-8." - "Invalid input that the codec failed on: b'\\x91'"] + [ + "WARNING:root:An error has occurred while parsing the file. " + "The line could not be decoded in utf-8." + "Invalid input that the codec failed on: b'\\x91'" + ], ) def test_response_not_binary(self): lines = [ - ("Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - " ID,Advertiser Status,Advertiser Integration Code,Insertion" - " Order,Insertion Order ID,Insertion Order Status,Insertion" - " Order Integration Code,Line Item,Line Item ID,Line Item" - " Status,Line Item Integration Code,Targeted Data Providers," - "Cookie Reach: Average Impression Frequency,Cookie Reach: " - "Impression Reach"), - ('BR,test_partner,123,Active,test_advertiser,123,' - '0,,test_io,123,Active,,test_line_item' - ',123,0,,"",0.00,41') + ( + "Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + " ID,Advertiser Status,Advertiser Integration Code,Insertion" + " Order,Insertion Order ID,Insertion Order Status,Insertion" + " Order Integration Code,Line Item,Line Item ID,Line Item" + " Status,Line Item Integration Code,Targeted Data Providers," + "Cookie Reach: Average Impression Frequency,Cookie Reach: " + "Impression Reach" + ), + ( + "BR,test_partner,123,Active,test_advertiser,123," + "0,,test_io,123,Active,,test_line_item" + ',123,0,,"",0.00,41' + ), ] expected_dict = { "Country": "BR", @@ -209,45 +213,40 @@ def test_response_not_binary(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } line_iterator_with_blank_line = (line for line in lines) - for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line - ): + for dic in get_generator_dict_from_str_csv(line_iterator_with_blank_line): self.assertEqual(dic, expected_dict) def test_line_parsing(self): - input_lines = [ - 'abc, 1, 0.0, 4, "a,b,c", abc', - '"abc", 1, 0.0, 4, "a,b,c", abc', - 'abc, 1, 0.0, 4, abc, abc' - ] + input_lines = ['abc, 1, 0.0, 4, "a,b,c", abc', '"abc", 1, 0.0, 4, "a,b,c", abc', "abc, 1, 0.0, 4, abc, abc"] expected_outputs = [ - ['abc', '1', '0.0', '4', 'a,b,c', 'abc'], - ['abc', '1', '0.0', '4', 'a,b,c', 'abc'], - ['abc', '1', '0.0', '4', 'abc', 'abc'] + ["abc", "1", "0.0", "4", "a,b,c", "abc"], + ["abc", "1", "0.0", "4", "a,b,c", "abc"], + ["abc", "1", "0.0", "4", "abc", "abc"], ] for index in range(len(input_lines)): - self.assertEqual( - parse_decoded_line(input_lines[index]), - expected_outputs[index] - ) + self.assertEqual(parse_decoded_line(input_lines[index]), expected_outputs[index]) def test_response_not_binary_with_date(self): lines = [ - ("Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - " ID,Advertiser Status,Advertiser Integration Code,Insertion" - " Order,Insertion Order ID,Insertion Order Status,Insertion" - " Order Integration Code,Line Item,Line Item ID,Line Item" - " Status,Line Item Integration Code,Targeted Data Providers," - "Cookie Reach: Average Impression Frequency,Cookie Reach: " - "Impression Reach"), - ('BR,test_partner,123,Active,test_advertiser,123,' - '0,,test_io,123,Active,,test_line_item' - ',123,0,,"",0.00,41') + ( + "Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + " ID,Advertiser Status,Advertiser Integration Code,Insertion" + " Order,Insertion Order ID,Insertion Order Status,Insertion" + " Order Integration Code,Line Item,Line Item ID,Line Item" + " Status,Line Item Integration Code,Targeted Data Providers," + "Cookie Reach: Average Impression Frequency,Cookie Reach: " + "Impression Reach" + ), + ( + "BR,test_partner,123,Active,test_advertiser,123," + "0,,test_io,123,Active,,test_line_item" + ',123,0,,"",0.00,41' + ), ] expected_dict = { "Country": "BR", @@ -266,52 +265,41 @@ def test_response_not_binary_with_date(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", "date_start": "2020/01/01", - "date_stop": "2020/01/31" + "date_stop": "2020/01/31", } line_iterator_with_blank_line = (line for line in lines) with patch("nck.utils.date_handler.date") as mock_date: mock_date.today.return_value = date(2020, 2, 1) mock_date.side_effect = lambda *args, **kw: date(*args, **kw) for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line, - add_date=True, - day_range="PREVIOUS_MONTH", - date_format="%Y/%m/%d" + line_iterator_with_blank_line, add_date=True, day_range="PREVIOUS_MONTH", date_format="%Y/%m/%d" ): self.assertEqual(dic, expected_dict) - @parameterized.expand([ - ( - True, - [ - b'"Perf report (2017-03-01 - 2020-03-25)"', - b'AdFormat\tAdGroupId\tAdGroupName', - b'IMAGE\t123\tAdGroup', - b'IMAGE\t123\tAdGroup', - ] - ), - ( - False, - [ - b'AdFormat\tAdGroupId\tAdGroupName', - b'IMAGE\t123\tAdGroup', - b'IMAGE\t123\tAdGroup', - ] - ) - ]) + def test_csv_with_headers_only(self): + input_report = (row for row in [b"Just,Headers,in,this,empty,report"]) + self.assertFalse(next(get_generator_dict_from_str_csv(input_report), False), "Data is not empty") + + @parameterized.expand( + [ + ( + True, + [ + b'"Perf report (2017-03-01 - 2020-03-25)"', + b"AdFormat\tAdGroupId\tAdGroupName", + b"IMAGE\t123\tAdGroup", + b"IMAGE\t123\tAdGroup", + ], + ), + (False, [b"AdFormat\tAdGroupId\tAdGroupName", b"IMAGE\t123\tAdGroup", b"IMAGE\t123\tAdGroup"]), + ] + ) def test_parse_tsv_with_first_row_skipped(self, skip_first_row, lines): - expected_dict = { - "AdFormat": "IMAGE", - "AdGroupId": "123", - "AdGroupName": "AdGroup" - } + expected_dict = {"AdFormat": "IMAGE", "AdGroupId": "123", "AdGroupName": "AdGroup"} line_iterator = (line for line in lines) - for dic in get_generator_dict_from_str_tsv( - line_iterator, - skip_first_row=skip_first_row - ): + for dic in get_generator_dict_from_str_tsv(line_iterator, skip_first_row=skip_first_row): self.assertEqual(dic, expected_dict) From 563c05f31d4f13caa7ed6905125cadebddc6a674 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Fri, 17 Apr 2020 10:09:16 +0200 Subject: [PATCH 54/66] Fix: requesting Data Warehouse reports to access multiple pages --- nck/readers/adobe_reader.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/nck/readers/adobe_reader.py b/nck/readers/adobe_reader.py index ee993ea2..aae07c03 100644 --- a/nck/readers/adobe_reader.py +++ b/nck/readers/adobe_reader.py @@ -71,7 +71,10 @@ def __init__(self, password, username, **kwargs): self.kwargs = kwargs def request(self, api, method, data=None): - """ Compare with https://marketing.adobe.com/developer/api-explorer """ + """ + Makes "raw" HTTP requests to Reporting API 1.4 (used within the query_report and get_report methods) + API workflow: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/get_started.md + """ api_method = "{0}.{1}".format(api, method) data = data or dict() logging.info("{}.{} {}".format(api, method, data)) @@ -86,11 +89,17 @@ def request(self, api, method, data=None): return json_response def build_report_description(self): + """ + Builds the reportDescription to be passed to the Report.Queue method as an input parameter. + Source is set at "warehouse" to get Data Wharehouse reports, and access multiple report pages. + Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/data_types/r_reportDescription.md + """ report_description = { "reportDescription": { + "source": "warehouse", "reportSuiteID": self.kwargs.get("report_suite_id"), "elements": [{"id": el} for el in self.kwargs.get("report_element_id", [])], - "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])], + "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])] } } self.set_date_gran_report_desc(report_description) @@ -108,6 +117,9 @@ def get_days_delta(self): return days_delta def set_date_range_report_desc(self, report_description): + """ + Adds the dateFrom and dateTo parameters to a reportDescription. + """ if self.kwargs.get("date_range") != (): start_date = self.kwargs.get("start_date") end_date = self.kwargs.get("end_date", datetime.datetime.now()) @@ -118,16 +130,33 @@ def set_date_range_report_desc(self, report_description): report_description["reportDescription"]["dateTo"] = end_date.strftime("%Y-%m-%d") def set_date_gran_report_desc(self, report_description): + """ + Adds the dateGranularity parameter to a reportDescription. + """ if self.kwargs.get("date_granularity", None) is not None: report_description["reportDescription"]["dateGranularity"] = self.kwargs.get("date_granularity") @retry def query_report(self): + """ + REQUEST STEP #1 + - Method: Report.Queue + - Input: reportDescription + - Output: reportID, to be passed to the Report.Get method + - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Queue.md + """ query_report = self.request(api="Report", method="Queue", data=self.build_report_description()) return query_report @retry def get_report(self, report_id, page_number=1): + """ + REQUEST STEP #2 + - Method: Report.Get + - Input: reportID, page + - Output: reportResponse containing the requested report data + - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Get.md + """ request_f = lambda: self.request(api="Report", method="Get", data={"reportID": report_id, "page": page_number}) response = request_f() idx = 1 @@ -141,6 +170,9 @@ def get_report(self, report_id, page_number=1): return response def download_report(self, rep_id): + """ + Parses reportResponses and iterates over report pages. + """ raw_response = self.get_report(rep_id, page_number=1) all_responses = [parse(raw_response)] if "totalPages" in raw_response["report"]: From 0c3055518f8ce38412545cd7a922b4bc74b55d93 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 25 Mar 2020 15:30:15 +0100 Subject: [PATCH 55/66] sa360 reader v0 --- .env | 4 +- nck/clients/sa360_client.py | 171 ++++++++++++++++++++++++++++++++++++ nck/helpers/sa360_helper.py | 41 +++++++++ nck/readers/__init__.py | 2 + nck/readers/sa360_reader.py | 123 ++++++++++++++++++++++++++ 5 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 nck/clients/sa360_client.py create mode 100644 nck/helpers/sa360_helper.py create mode 100644 nck/readers/sa360_reader.py diff --git a/.env b/.env index cf087144..438ecf16 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connector-kit-dev -DOCKER_TAG=FBURL +DOCKER_IMAGE=nautilus-connector-kit +DOCKER_TAG=1.4.0 DOCKER_REGISTRY=eu.gcr.io diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py new file mode 100644 index 00000000..c953855a --- /dev/null +++ b/nck/clients/sa360_client.py @@ -0,0 +1,171 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import logging +import httplib2 +import requests + +from tenacity import retry, wait_exponential, stop_after_delay +from oauth2client import client, GOOGLE_TOKEN_URI +from googleapiclient import discovery + + +logger = logging.getLogger("SA360_client") + +DOWNLOAD_FORMAT = "CSV" + + +class SA360Client: + API_NAME = "doubleclicksearch" + API_VERSION = "v2" + + def __init__(self, access_token, client_id, client_secret, refresh_token): + self._credentials = client.GoogleCredentials( + access_token=access_token, + client_id=client_id, + client_secret=client_secret, + refresh_token=refresh_token, + token_expiry=None, + token_uri=GOOGLE_TOKEN_URI, + user_agent=None, + ) + http = self._credentials.authorize(httplib2.Http()) + self._credentials.refresh(http) + self.auth = ( + f"{self._credentials.token_response['token_type']} {self._credentials.token_response['access_token']}" + ) + self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) + + @staticmethod + def generate_report_body( + agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions + ): + all_columns = SA360Client.generate_columns(columns, custom_metrics, custom_dimensions) + body = { + "reportScope": {"agencyId": agency_id, "advertiserId": advertiser_id}, + "reportType": report_type, + "columns": all_columns, + "timeRange": SA360Client.get_date_range(start_date, end_date), + "downloadFormat": "csv", + "maxRowsPerFile": 4000000, + "statisticsCurrency": "usd", + } + logger.info("Report Body Generated") + + return body + + def request_report_id(self, body): + report = self._service.reports().request(body=body).execute() + logger.info("Report requested!") + return report["id"] + + @retry(wait=wait_exponential(multiplier=1, min=1, max=8), stop=stop_after_delay(3600)) + def assert_report_file_ready(self, report_id): + """Poll the API with the reportId until the report is ready, up to 100 times. + + Args: + report_id: The ID SA360 has assigned to a report. + """ + request = self._service.reports().get(reportId=report_id) + report_data = request.execute() + if report_data["isReportReady"]: + logger.info("The report is ready.") + + # For large reports, SA360 automatically fragments the report into multiple + # files. The 'files' property in the JSON object that SA360 returns contains + # the list of URLs for file fragment. To download a report, SA360 needs to + # know the report ID and the index of a file fragment. + return report_data + else: + logger.info("Report is not ready.") + raise FileNotFoundError + + def download_report_files(self, json_data, report_id): + for fragment in range(len(json_data["files"])): + logger.info(f"Downloading fragment {str(fragment)} for report {report_id}") + yield self.download_fragment(report_id, str(fragment)) + + def download_fragment(self, report_id, fragment): + """Generate and convert to df a report fragment. + + Args: + report_id: The ID SA360 has assigned to a report. + report_fragment: The 0-based index of the file fragment from the files array. + currency_code: the currency code of the report + """ + # csv_fragment_report = (self._service.reports().getFile(reportId=report_id, reportFragment=fragment).execute()) + # print(csv_fragment_report) + # print(io.BytesIO(csv_fragment_report)) + request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) + headers = request.headers + headers.update({"Authorization": self.auth}) + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() + + # i = 0 + # index = 0 + # impr_keyword = 0 + # for row in r.iter_lines(): + # decoded_row = row.decode("utf-8") + # if "impr" in decoded_row: + # decoded_row = decoded_row.split(",") + # index = decoded_row.index("impr") + # continue + # + # if "samsung note 10+ 6.8" in decoded_row: + # r = decoded_row.split(",") + # impr_keyword += int(r[index]) + # print(decoded_row) + # decoded_row = decoded_row.split(",") + # i += int(decoded_row[index]) + # print("IMPRESSIONS", i, impr_keyword) + + # yield from r.iter_lines() + + # df = pd.DataFrame.from_csv(io.BytesIO(csv_fragment_report)) + # df["currency_code"] = currency_code + # from tabulate import tabulate + # print(tabulate(df, headers='keys', tablefmt='psql')) + # return df + + def direct_report_download(self, report_id, file_id): + # Retrieve the file metadata. + report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() + + if report_file["status"] == "REPORT_AVAILABLE": + # Create a get request. + request = self._service.files().get_media(reportId=report_id, fileId=file_id) + headers = request.headers + r = requests.get(request.uri, stream=True, headers=headers) + + yield from r.iter_lines() + + @staticmethod + def generate_columns(columns, custom_dimensions, custom_metrics): + standard = [{"columnName": column} for column in columns] + dimensions = [{"columnDimensionName": column, "platformSource": "floodlight"} for column in custom_dimensions] + metrics = [{"columnMetricName": column, "platformSource": "floodlight"} for column in custom_metrics] + + return standard + dimensions + metrics + + @staticmethod + def get_date_range(start_date, end_date): + start = start_date.strftime("%Y-%m-%d") + end = end_date.strftime("%Y-%m-%d") + logger.warning(f"Custom date range selected: {start} --> {end}") + return {"startDate": start, "endDate": end} diff --git a/nck/helpers/sa360_helper.py b/nck/helpers/sa360_helper.py new file mode 100644 index 00000000..7375996c --- /dev/null +++ b/nck/helpers/sa360_helper.py @@ -0,0 +1,41 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +REPORT_TYPES = [ + "advertiser", + "account", + "ad", + "adGroup", + "adGroupTarget", + "bidStrategy", + "campaign", + "campaignTarget", + "conversion", + "feedltem", + "floodlightActivity", + "keyword", + "negativeAdGroupKeyword", + "negativeAdGroupTarget", + "negativeCampaignKeyword", + "negativeCampaignTarget", + "paidAndOrganic", + "productAdvertised", + "productGroup", + "productLeadAndCrossSell", + "productTarget", + "visit", +] diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 8c5375a8..7d9993bb 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -21,6 +21,7 @@ from nck.readers.gcs_reader import gcs from nck.readers.googleads_reader import google_ads from nck.readers.s3_reader import s3 +from nck.readers.sa360_reader import sa360_reader from nck.readers.oracle_reader import oracle from nck.readers.gsheets_reader import gsheets from nck.readers.salesforce_reader import salesforce @@ -41,6 +42,7 @@ gcs, google_ads, s3, + sa360_reader, facebook_marketing, oracle, dbm, diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py new file mode 100644 index 00000000..b201a6db --- /dev/null +++ b/nck/readers/sa360_reader.py @@ -0,0 +1,123 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import csv +import click + +from io import StringIO + +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.utils.args import extract_args +from nck.streams.normalized_json_stream import NormalizedJSONStream +from nck.clients.sa360_client import SA360Client +from nck.helpers.sa360_helper import REPORT_TYPES + +DATEFORMAT = "%Y-%m-%d" +ENCODING = "utf-8" + + +@click.command(name="read_sa360") +@click.option("--sa360-access-token", default=None) +@click.option("--sa360-client-id", required=True) +@click.option("--sa360-client-secret", required=True) +@click.option("--sa360-refresh-token", required=True) +@click.option("--sa360-agency-id", required=True) +@click.option("--sa360-advertiser-id", "sa360_advertiser_ids", required=True, multiple=True) +@click.option("--sa360-report-name", default="SA360 Report") +@click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) +@click.option( + "--sa360-column", "sa360_columns", multiple=True, help="https://developers.google.com/search-ads/v2/report-types" +) +@click.option( + "--sa360-custom-dimension", + "sa360_custom_dimensions", + multiple=True, + help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", +) +@click.option( + "--sa360-custom-metric", + "sa360_custom_metrics", + multiple=True, + help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", +) +@click.option("--sa360-start-date", type=click.DateTime(), required=True) +@click.option("--sa360-end-date", type=click.DateTime(), required=True) +@processor("sa360_access_token", "sa360_refresh_token", "sa360_client_secret") +def sa360_reader(**kwargs): + return SA360Reader(**extract_args("sa360_", kwargs)) + + +class SA360Reader(Reader): + def __init__( + self, + access_token, + client_id, + client_secret, + refresh_token, + agency_id, + advertiser_ids, + report_name, + report_type, + columns, + custom_metrics, + custom_dimensions, + start_date, + end_date, + ): + self.sa360_client = SA360Client(access_token, client_id, client_secret, refresh_token) + self.agency_id = agency_id + self.advertiser_ids = list(advertiser_ids) + self.report_name = report_name + self.report_type = report_type + self.columns = list(columns) + self.custom_metrics = list(custom_metrics) + self.custom_dimensions = list(custom_dimensions) + self.all_columns = self.columns + self.custom_dimensions + self.custom_metrics + self.start_date = start_date + self.end_date = end_date + + def format_response(self, report_generator): + # skip headers in the CSV output + next(report_generator) + for row in report_generator: + decoded_row = row.decode(ENCODING) + csv_reader = csv.DictReader(StringIO(decoded_row), self.all_columns) + yield next(csv_reader) + + def result_generator(self): + advertiser_id = next((a for a in self.advertiser_ids), "") + body = self.sa360_client.generate_report_body( + self.agency_id, + advertiser_id, + self.report_type, + self.columns, + self.start_date, + self.end_date, + self.custom_dimensions, + self.custom_metrics, + ) + + report_id = self.sa360_client.request_report_id(body) + + report_data = self.sa360_client.assert_report_file_ready(report_id) + + for report_generator in self.sa360_client.download_report_files(report_data, report_id): + yield from self.format_response(report_generator) + + def read(self): + yield NormalizedJSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) From 048c2f8af56756caf92554d60facb316a141195d Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 27 Mar 2020 16:43:59 +0100 Subject: [PATCH 56/66] handle several advertisers + add tests --- nck/clients/sa360_client.py | 44 ++++++++------------------- nck/readers/sa360_reader.py | 40 ++++++++++++++---------- tests/clients/test_sa360_client.py | 33 ++++++++++++++++++++ tests/readers/test_sa360_reader.py | 49 ++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 48 deletions(-) create mode 100644 tests/clients/test_sa360_client.py create mode 100644 tests/readers/test_sa360_reader.py diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py index c953855a..4b71d2e6 100644 --- a/nck/clients/sa360_client.py +++ b/nck/clients/sa360_client.py @@ -23,9 +23,7 @@ from oauth2client import client, GOOGLE_TOKEN_URI from googleapiclient import discovery - logger = logging.getLogger("SA360_client") - DOWNLOAD_FORMAT = "CSV" @@ -50,6 +48,17 @@ def __init__(self, access_token, client_id, client_secret, refresh_token): ) self._service = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) + def get_all_advertisers_of_agency(self, agency_id): + body = { + "reportScope": {"agencyId": agency_id}, + "reportType": "advertiser", + "columns": [{"columnName": "advertiserId"}], + "statisticsCurrency": "usd", + } + report = self._service.reports().generate(body=body).execute() + advertiser_ids = [row["advertiserId"] for row in report["rows"]] + return advertiser_ids + @staticmethod def generate_report_body( agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions @@ -73,7 +82,7 @@ def request_report_id(self, body): logger.info("Report requested!") return report["id"] - @retry(wait=wait_exponential(multiplier=1, min=1, max=8), stop=stop_after_delay(3600)) + @retry(wait=wait_exponential(multiplier=60, min=60, max=600), stop=stop_after_delay(3600)) def assert_report_file_ready(self, report_id): """Poll the API with the reportId until the report is ready, up to 100 times. @@ -107,9 +116,6 @@ def download_fragment(self, report_id, fragment): report_fragment: The 0-based index of the file fragment from the files array. currency_code: the currency code of the report """ - # csv_fragment_report = (self._service.reports().getFile(reportId=report_id, reportFragment=fragment).execute()) - # print(csv_fragment_report) - # print(io.BytesIO(csv_fragment_report)) request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) headers = request.headers headers.update({"Authorization": self.auth}) @@ -117,32 +123,6 @@ def download_fragment(self, report_id, fragment): yield from r.iter_lines() - # i = 0 - # index = 0 - # impr_keyword = 0 - # for row in r.iter_lines(): - # decoded_row = row.decode("utf-8") - # if "impr" in decoded_row: - # decoded_row = decoded_row.split(",") - # index = decoded_row.index("impr") - # continue - # - # if "samsung note 10+ 6.8" in decoded_row: - # r = decoded_row.split(",") - # impr_keyword += int(r[index]) - # print(decoded_row) - # decoded_row = decoded_row.split(",") - # i += int(decoded_row[index]) - # print("IMPRESSIONS", i, impr_keyword) - - # yield from r.iter_lines() - - # df = pd.DataFrame.from_csv(io.BytesIO(csv_fragment_report)) - # df["currency_code"] = currency_code - # from tabulate import tabulate - # print(tabulate(df, headers='keys', tablefmt='psql')) - # return df - def direct_report_download(self, report_id, file_id): # Retrieve the file metadata. report_file = self._service.files().get(reportId=report_id, fileId=file_id).execute() diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index b201a6db..5a9eea27 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -37,7 +37,12 @@ @click.option("--sa360-client-secret", required=True) @click.option("--sa360-refresh-token", required=True) @click.option("--sa360-agency-id", required=True) -@click.option("--sa360-advertiser-id", "sa360_advertiser_ids", required=True, multiple=True) +@click.option( + "--sa360-advertiser-id", + "sa360_advertiser_ids", + multiple=True, + help="If empty, all advertisers from agency will be requested", +) @click.option("--sa360-report-name", default="SA360 Report") @click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) @click.option( @@ -100,24 +105,27 @@ def format_response(self, report_generator): yield next(csv_reader) def result_generator(self): - advertiser_id = next((a for a in self.advertiser_ids), "") - body = self.sa360_client.generate_report_body( - self.agency_id, - advertiser_id, - self.report_type, - self.columns, - self.start_date, - self.end_date, - self.custom_dimensions, - self.custom_metrics, - ) + for advertiser_id in self.advertiser_ids: + body = self.sa360_client.generate_report_body( + self.agency_id, + advertiser_id, + self.report_type, + self.columns, + self.start_date, + self.end_date, + self.custom_dimensions, + self.custom_metrics, + ) - report_id = self.sa360_client.request_report_id(body) + report_id = self.sa360_client.request_report_id(body) - report_data = self.sa360_client.assert_report_file_ready(report_id) + report_data = self.sa360_client.assert_report_file_ready(report_id) - for report_generator in self.sa360_client.download_report_files(report_data, report_id): - yield from self.format_response(report_generator) + for report_generator in self.sa360_client.download_report_files(report_data, report_id): + yield from self.format_response(report_generator) def read(self): + if not self.advertiser_ids: + self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency(self.agency_id) + yield NormalizedJSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) diff --git a/tests/clients/test_sa360_client.py b/tests/clients/test_sa360_client.py new file mode 100644 index 00000000..9c30e8ff --- /dev/null +++ b/tests/clients/test_sa360_client.py @@ -0,0 +1,33 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase +from nck.clients.sa360_client import SA360Client + + +class SA360ClientTest(TestCase): + def test_generate_all_columns(self): + standard = ["clicks", "impressions"] + custom_dimensions = ["customDimension"] + custom_metrics = ["customMetric"] + expected = [ + {"columnName": "clicks"}, + {"columnName": "impressions"}, + {"columnDimensionName": "customDimension", "platformSource": "floodlight"}, + {"columnMetricName": "customMetric", "platformSource": "floodlight"}, + ] + assert SA360Client.generate_columns(standard, custom_dimensions, custom_metrics) == expected diff --git a/tests/readers/test_sa360_reader.py b/tests/readers/test_sa360_reader.py new file mode 100644 index 00000000..42f52292 --- /dev/null +++ b/tests/readers/test_sa360_reader.py @@ -0,0 +1,49 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from unittest import TestCase, mock +import logging + +from nck.readers.sa360_reader import SA360Reader + +logger = logging.getLogger("SA360_reader_test") + + +class SA360ReaderTest(TestCase): + def mock_sa360_reader(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + kwargs = {"all_columns": ["impressions", "clicks"]} + + @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) + def test_empty_data(self): + reader = SA360Reader(**self.kwargs) + input_report = (row for row in [b"Just Headers in this empty report"]) + if len(list(reader.format_response(input_report))) > 0: + assert False, "Data is not empty" + + @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) + def test_format_data(self): + reader = SA360Reader(**self.kwargs) + input_report = (row for row in [b"impressions,clicks", b"1,2", b"3,4"]) + expected = [{"impressions": "1", "clicks": "2"}, {"impressions": "3", "clicks": "4"}] + input_list = list(reader.format_response(input_report)) + assert len(input_list) == len(expected) + + for input_row, output in zip(input_list, expected): + assert input_row == output From 9ccc907f74cf6ccbe67caeca29c288a8b6a2ad23 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 27 Mar 2020 17:38:31 +0100 Subject: [PATCH 57/66] add documentation --- nck/readers/README.md | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/nck/readers/README.md b/nck/readers/README.md index b34fc950..52b308c4 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -155,6 +155,63 @@ python nck/entrypoint.py read_search_console --search-console-client-id and + +- A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling + +See the [documentation here](https://developers.google.com/search-ads/v2/authorizing "SA360 Authentication") +to set-up your OAuth2 credentials and refresh token specifically for Searc hAds 360 Reporting. + + +#### Which Reports and Metrics are available in the API + +The list of available reports for the API, and the associated metrics, can be [found here](https://developers.google.com/search-ads/v2/report-types "Report Types") + +#### Simple API call example + +- Call Example + + +The following command retrieves insights about the Ads in the Search Ads 360 Account from the agency thanks to +your , and with the necessary permissions to access your Accounts. + +``` +python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-agency-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 +``` + +*If it doesn't work, try to* `export PYTHONPATH="."` *in the nautilus-connector-kit folder (to be sure Python is reading correctly)* +*If you want the output to be printed in your console, add* `write_console` *at the end of your command (see writers for more details)* + + +- Parameters of the SA360 Reader + +| CLI option | Documentation | +| ---------- | ------------- | +|`--sa360-access-token` | (Optional) Access token | +|`--sa360-client-id` | OAuth2 ID | +|`--sa360-client-secret` | OAuth2 ID Secret | +|`--sa360-refresh-token` | Refresh token | +|`--sa360-agency-id` | Agency ID to request in SA360 | +|`--sa360-advertiser-id` | (Optional) Advertiser ids to request. If not provided, every advertiser of the agency will be requested| +|`--sa360-report-name` | (Optional) Name of the output report | +|`--sa360-report-type` | Type of the report to request. List [here](https://developers.google.com/search-ads/v2/report-types)| +|`--sa360-column` | Dimensions and metrics to request in the report | +|`--sa360-custom-dimension` | (Optional) Custom dimensions to report | +|`--sa360-custom-metric` | (Optional) Custom metrics to report | +|`--sa360-start-date` | Start date of the period to request | +|`--sa360-end-date` | End date of the period to request | + +See the documents below for a better understanding of the parameters: +- [SA360 Reporting](https://developers.google.com/search-ads/v2/how-tos/reporting) + + ## Yandex readers For now, there is only one Yandex API you can access through Nautilus connectors: [Direct API](https://tech.yandex.com/direct/). From 0378dd37fc35bf540e8b86620202115f134fa96e Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 8 Apr 2020 16:42:37 +0200 Subject: [PATCH 58/66] add saved_columns + fix typo --- nck/clients/sa360_client.py | 19 +++++++------------ nck/readers/README.md | 4 ++-- nck/readers/sa360_reader.py | 23 +++++++---------------- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/nck/clients/sa360_client.py b/nck/clients/sa360_client.py index 4b71d2e6..fae755ab 100644 --- a/nck/clients/sa360_client.py +++ b/nck/clients/sa360_client.py @@ -60,10 +60,8 @@ def get_all_advertisers_of_agency(self, agency_id): return advertiser_ids @staticmethod - def generate_report_body( - agency_id, advertiser_id, report_type, columns, start_date, end_date, custom_metrics, custom_dimensions - ): - all_columns = SA360Client.generate_columns(columns, custom_metrics, custom_dimensions) + def generate_report_body(agency_id, advertiser_id, report_type, columns, start_date, end_date, saved_columns): + all_columns = SA360Client.generate_columns(columns, saved_columns) body = { "reportScope": {"agencyId": agency_id, "advertiserId": advertiser_id}, "reportType": report_type, @@ -74,7 +72,6 @@ def generate_report_body( "statisticsCurrency": "usd", } logger.info("Report Body Generated") - return body def request_report_id(self, body): @@ -100,7 +97,7 @@ def assert_report_file_ready(self, report_id): # know the report ID and the index of a file fragment. return report_data else: - logger.info("Report is not ready.") + logger.info("Report is not ready. Retrying...") raise FileNotFoundError def download_report_files(self, json_data, report_id): @@ -113,8 +110,7 @@ def download_fragment(self, report_id, fragment): Args: report_id: The ID SA360 has assigned to a report. - report_fragment: The 0-based index of the file fragment from the files array. - currency_code: the currency code of the report + fragment: The 0-based index of the file fragment from the files array. """ request = self._service.reports().getFile(reportId=report_id, reportFragment=fragment) headers = request.headers @@ -136,12 +132,11 @@ def direct_report_download(self, report_id, file_id): yield from r.iter_lines() @staticmethod - def generate_columns(columns, custom_dimensions, custom_metrics): + def generate_columns(columns, saved_columns): standard = [{"columnName": column} for column in columns] - dimensions = [{"columnDimensionName": column, "platformSource": "floodlight"} for column in custom_dimensions] - metrics = [{"columnMetricName": column, "platformSource": "floodlight"} for column in custom_metrics] + saved = [{"savedColumnName": column} for column in saved_columns] - return standard + dimensions + metrics + return standard + saved @staticmethod def get_date_range(start_date, end_date): diff --git a/nck/readers/README.md b/nck/readers/README.md index 52b308c4..04768304 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -167,7 +167,7 @@ Using the Search Ads API requires two things: - A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling See the [documentation here](https://developers.google.com/search-ads/v2/authorizing "SA360 Authentication") -to set-up your OAuth2 credentials and refresh token specifically for Searc hAds 360 Reporting. +to set-up your OAuth2 credentials and refresh token specifically for Search Ads 360 Reporting. #### Which Reports and Metrics are available in the API @@ -183,7 +183,7 @@ The following command retrieves insights about the Ads in the Search Ads 360 Acc your , and with the necessary permissions to access your Accounts. ``` -python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-agency-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 +python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-advertiser-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 ``` *If it doesn't work, try to* `export PYTHONPATH="."` *in the nautilus-connector-kit folder (to be sure Python is reading correctly)* diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index 5a9eea27..9ab61827 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -49,16 +49,10 @@ "--sa360-column", "sa360_columns", multiple=True, help="https://developers.google.com/search-ads/v2/report-types" ) @click.option( - "--sa360-custom-dimension", - "sa360_custom_dimensions", + "--sa360-saved-column", + "sa360_saved_columns", multiple=True, - help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", -) -@click.option( - "--sa360-custom-metric", - "sa360_custom_metrics", - multiple=True, - help="https://developers.google.com/search-ads/v2/how-tos/reporting/custom-metrics-dimensions", + help="https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns", ) @click.option("--sa360-start-date", type=click.DateTime(), required=True) @click.option("--sa360-end-date", type=click.DateTime(), required=True) @@ -79,8 +73,7 @@ def __init__( report_name, report_type, columns, - custom_metrics, - custom_dimensions, + saved_columns, start_date, end_date, ): @@ -90,9 +83,8 @@ def __init__( self.report_name = report_name self.report_type = report_type self.columns = list(columns) - self.custom_metrics = list(custom_metrics) - self.custom_dimensions = list(custom_dimensions) - self.all_columns = self.columns + self.custom_dimensions + self.custom_metrics + self.saved_columns = list(saved_columns) + self.all_columns = self.columns + self.saved_columns self.start_date = start_date self.end_date = end_date @@ -113,8 +105,7 @@ def result_generator(self): self.columns, self.start_date, self.end_date, - self.custom_dimensions, - self.custom_metrics, + self.saved_columns, ) report_id = self.sa360_client.request_report_id(body) From f41460cf015df0bb9f717b649e7820464b05f293 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 10 Apr 2020 17:40:47 +0200 Subject: [PATCH 59/66] change tests --- tests/clients/test_sa360_client.py | 12 +++--------- tests/readers/test_sa360_reader.py | 8 ++------ 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/clients/test_sa360_client.py b/tests/clients/test_sa360_client.py index 9c30e8ff..87f606f3 100644 --- a/tests/clients/test_sa360_client.py +++ b/tests/clients/test_sa360_client.py @@ -22,12 +22,6 @@ class SA360ClientTest(TestCase): def test_generate_all_columns(self): standard = ["clicks", "impressions"] - custom_dimensions = ["customDimension"] - custom_metrics = ["customMetric"] - expected = [ - {"columnName": "clicks"}, - {"columnName": "impressions"}, - {"columnDimensionName": "customDimension", "platformSource": "floodlight"}, - {"columnMetricName": "customMetric", "platformSource": "floodlight"}, - ] - assert SA360Client.generate_columns(standard, custom_dimensions, custom_metrics) == expected + saved = ["savedColumn"] + expected = [{"columnName": "clicks"}, {"columnName": "impressions"}, {"savedColumnName": "savedColumn"}] + self.assertEqual(SA360Client.generate_columns(standard, saved), expected) diff --git a/tests/readers/test_sa360_reader.py b/tests/readers/test_sa360_reader.py index 42f52292..1f37a1e4 100644 --- a/tests/readers/test_sa360_reader.py +++ b/tests/readers/test_sa360_reader.py @@ -34,8 +34,7 @@ def mock_sa360_reader(self, **kwargs): def test_empty_data(self): reader = SA360Reader(**self.kwargs) input_report = (row for row in [b"Just Headers in this empty report"]) - if len(list(reader.format_response(input_report))) > 0: - assert False, "Data is not empty" + self.assertFalse(next(reader.format_response(input_report), False), "Data is not empty") @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) def test_format_data(self): @@ -43,7 +42,4 @@ def test_format_data(self): input_report = (row for row in [b"impressions,clicks", b"1,2", b"3,4"]) expected = [{"impressions": "1", "clicks": "2"}, {"impressions": "3", "clicks": "4"}] input_list = list(reader.format_response(input_report)) - assert len(input_list) == len(expected) - - for input_row, output in zip(input_list, expected): - assert input_row == output + self.assertListEqual(input_list, expected) From 5aa28f973aea2f334a5f842fd681a480a8122bc5 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 10 Apr 2020 19:54:48 +0200 Subject: [PATCH 60/66] generate_dict_from_csv --- nck/readers/sa360_reader.py | 16 +- nck/utils/text.py | 16 +- tests/readers/test_sa360_reader.py | 45 ------ tests/utils/test_text_utils.py | 242 ++++++++++++++--------------- 4 files changed, 122 insertions(+), 197 deletions(-) delete mode 100644 tests/readers/test_sa360_reader.py diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index 9ab61827..5f2528a5 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -15,17 +15,15 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import csv import click -from io import StringIO - from nck.commands.command import processor from nck.readers.reader import Reader -from nck.utils.args import extract_args from nck.streams.normalized_json_stream import NormalizedJSONStream from nck.clients.sa360_client import SA360Client from nck.helpers.sa360_helper import REPORT_TYPES +from nck.utils.args import extract_args +from nck.utils.text import get_generator_dict_from_str_csv DATEFORMAT = "%Y-%m-%d" ENCODING = "utf-8" @@ -88,14 +86,6 @@ def __init__( self.start_date = start_date self.end_date = end_date - def format_response(self, report_generator): - # skip headers in the CSV output - next(report_generator) - for row in report_generator: - decoded_row = row.decode(ENCODING) - csv_reader = csv.DictReader(StringIO(decoded_row), self.all_columns) - yield next(csv_reader) - def result_generator(self): for advertiser_id in self.advertiser_ids: body = self.sa360_client.generate_report_body( @@ -113,7 +103,7 @@ def result_generator(self): report_data = self.sa360_client.assert_report_file_ready(report_id) for report_generator in self.sa360_client.download_report_files(report_data, report_id): - yield from self.format_response(report_generator) + yield from get_generator_dict_from_str_csv(report_generator) def read(self): if not self.advertiser_ids: diff --git a/nck/utils/text.py b/nck/utils/text.py index b6ff82f1..08170d10 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -39,10 +39,7 @@ def add_column_value_to_csv_line_iterator(line_iterator, columname, value): def get_generator_dict_from_str_csv( - line_iterator: Generator[Union[bytes, str], None, None], - add_date=False, - day_range=None, - date_format="%Y-%m-%d" + line_iterator: Generator[Union[bytes, str], None, None], add_date=False, day_range=None, date_format="%Y-%m-%d" ) -> Generator[Dict[str, str], None, None]: first_line = next(line_iterator) headers = ( @@ -58,7 +55,7 @@ def get_generator_dict_from_str_csv( line = line.decode("utf-8") except UnicodeDecodeError as err: logging.warning( - "An error has occured while parsing the file. " + "An error has occurred while parsing the file. " "The line could not be decoded in %s." "Invalid input that the codec failed on: %s", err.encoding, @@ -77,8 +74,7 @@ def get_generator_dict_from_str_csv( def get_generator_dict_from_str_tsv( - line_iterator: Generator[Union[bytes, str], None, None], - skip_first_row=False + line_iterator: Generator[Union[bytes, str], None, None], skip_first_row=False ) -> Generator[Dict[str, str], None, None]: if skip_first_row: next(line_iterator) @@ -108,11 +104,7 @@ def get_generator_dict_from_str_tsv( def parse_decoded_line(line: str, delimiter=",", quotechar='"') -> List[str]: line_as_file = StringIO(line) reader = csv.reader( - line_as_file, - delimiter=delimiter, - quotechar=quotechar, - quoting=csv.QUOTE_ALL, - skipinitialspace=True, + line_as_file, delimiter=delimiter, quotechar=quotechar, quoting=csv.QUOTE_ALL, skipinitialspace=True ) return next(reader) diff --git a/tests/readers/test_sa360_reader.py b/tests/readers/test_sa360_reader.py deleted file mode 100644 index 1f37a1e4..00000000 --- a/tests/readers/test_sa360_reader.py +++ /dev/null @@ -1,45 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from unittest import TestCase, mock -import logging - -from nck.readers.sa360_reader import SA360Reader - -logger = logging.getLogger("SA360_reader_test") - - -class SA360ReaderTest(TestCase): - def mock_sa360_reader(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - - kwargs = {"all_columns": ["impressions", "clicks"]} - - @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) - def test_empty_data(self): - reader = SA360Reader(**self.kwargs) - input_report = (row for row in [b"Just Headers in this empty report"]) - self.assertFalse(next(reader.format_response(input_report), False), "Data is not empty") - - @mock.patch.object(SA360Reader, "__init__", mock_sa360_reader) - def test_format_data(self): - reader = SA360Reader(**self.kwargs) - input_report = (row for row in [b"impressions,clicks", b"1,2", b"3,4"]) - expected = [{"impressions": "1", "clicks": "2"}, {"impressions": "3", "clicks": "4"}] - input_list = list(reader.format_response(input_report)) - self.assertListEqual(input_list, expected) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index 0a9202f0..7c6f6a6e 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -16,33 +16,32 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging -import unittest +from unittest import TestCase from datetime import date from unittest.mock import patch from parameterized import parameterized -from nck.utils.text import (get_generator_dict_from_str_csv, - get_generator_dict_from_str_tsv, - parse_decoded_line) +from nck.utils.text import get_generator_dict_from_str_csv, get_generator_dict_from_str_tsv, parse_decoded_line -class TestTextUtilsMethod(unittest.TestCase): - +class TestTextUtilsMethod(TestCase): def test_multiple_encodings(self): test_string_to_encode = ( - 'BR,test_partner,123,Active,test_advertiser,123,' - '0,,test_io,123,Active,,test_line_item' + "BR,test_partner,123,Active,test_advertiser,123," + "0,,test_io,123,Active,,test_line_item" ',123,0,,"",0.00,41' ) lines = [ - (b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach"), + ( + b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + b" ID,Advertiser Status,Advertiser Integration Code,Insertion" + b" Order,Insertion Order ID,Insertion Order Status,Insertion" + b" Order Integration Code,Line Item,Line Item ID,Line Item" + b" Status,Line Item Integration Code,Targeted Data Providers," + b"Cookie Reach: Average Impression Frequency,Cookie Reach: " + b"Impression Reach" + ), test_string_to_encode.encode("utf-8"), test_string_to_encode.encode("ascii"), test_string_to_encode.encode("windows-1252"), @@ -66,36 +65,37 @@ def test_multiple_encodings(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } - for yielded_dict in get_generator_dict_from_str_csv( - line_iterator_multiple_encodings - ): + for yielded_dict in get_generator_dict_from_str_csv(line_iterator_multiple_encodings): self.assertDictEqual(yielded_dict, expected_dict) def test_blank_line(self): lines = [ - (b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach"), - "" + ( + b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + b" ID,Advertiser Status,Advertiser Integration Code,Insertion" + b" Order,Insertion Order ID,Insertion Order Status,Insertion" + b" Order Integration Code,Line Item,Line Item ID,Line Item" + b" Status,Line Item Integration Code,Targeted Data Providers," + b"Cookie Reach: Average Impression Frequency,Cookie Reach: " + b"Impression Reach" + ), + "", ] line_iterator_with_blank_line = (line for line in lines) - self.assertTrue(get_generator_dict_from_str_csv( - line_iterator_with_blank_line - )) + self.assertTrue(get_generator_dict_from_str_csv(line_iterator_with_blank_line)) lines.insert( 1, - (b'BR,test_partner,123,Active,test_advertiser,123,' - b'0,,test_io,123,Active,,test_line_item' - b',123,0,,"",0.00,41')) + ( + b"BR,test_partner,123,Active,test_advertiser,123," + b"0,,test_io,123,Active,,test_line_item" + b',123,0,,"",0.00,41' + ), + ) expected_dict = { "Country": "BR", "Partner": "test_partner", @@ -113,37 +113,37 @@ def test_blank_line(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } line_iterator_with_blank_line = (line for line in lines) - for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line - ): + for dic in get_generator_dict_from_str_csv(line_iterator_with_blank_line): self.assertDictEqual(dic, expected_dict) lines.append("This is something that should not be here.") line_iterator_with_blank_line = (line for line in lines) - test_result = get_generator_dict_from_str_csv( - line_iterator_with_blank_line - ) + test_result = get_generator_dict_from_str_csv(line_iterator_with_blank_line) self.assertEqual(len(list(test_result)), 1) for dic in test_result: self.assertEqual(dic, expected_dict) def test_invalid_byte(self): lines = [ - (b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach"), - (b'BR,test_partner,123,Active,test_advertiser,123,' - b'0,,test_io,123,Active,,test_line_item' - b',123,0,," \x91\xea\xd0$",0.00,41'), + ( + b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + b" ID,Advertiser Status,Advertiser Integration Code,Insertion" + b" Order,Insertion Order ID,Insertion Order Status,Insertion" + b" Order Integration Code,Line Item,Line Item ID,Line Item" + b" Status,Line Item Integration Code,Targeted Data Providers," + b"Cookie Reach: Average Impression Frequency,Cookie Reach: " + b"Impression Reach" + ), + ( + b"BR,test_partner,123,Active,test_advertiser,123," + b"0,,test_io,123,Active,,test_line_item" + b',123,0,," \x91\xea\xd0$",0.00,41' + ), ] line_iterator_invalid_byte = (line for line in lines) expected_dict = { @@ -163,34 +163,38 @@ def test_invalid_byte(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": ' $', + "Targeted Data Providers": " $", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } with self.assertLogs(level=logging.INFO) as cm: - for yielded_dict in get_generator_dict_from_str_csv( - line_iterator_invalid_byte - ): + for yielded_dict in get_generator_dict_from_str_csv(line_iterator_invalid_byte): self.assertDictEqual(yielded_dict, expected_dict) self.assertEqual( cm.output, - ["WARNING:root:An error has occured while parsing the file. " - "The line could not be decoded in utf-8." - "Invalid input that the codec failed on: b'\\x91'"] + [ + "WARNING:root:An error has occurred while parsing the file. " + "The line could not be decoded in utf-8." + "Invalid input that the codec failed on: b'\\x91'" + ], ) def test_response_not_binary(self): lines = [ - ("Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - " ID,Advertiser Status,Advertiser Integration Code,Insertion" - " Order,Insertion Order ID,Insertion Order Status,Insertion" - " Order Integration Code,Line Item,Line Item ID,Line Item" - " Status,Line Item Integration Code,Targeted Data Providers," - "Cookie Reach: Average Impression Frequency,Cookie Reach: " - "Impression Reach"), - ('BR,test_partner,123,Active,test_advertiser,123,' - '0,,test_io,123,Active,,test_line_item' - ',123,0,,"",0.00,41') + ( + "Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + " ID,Advertiser Status,Advertiser Integration Code,Insertion" + " Order,Insertion Order ID,Insertion Order Status,Insertion" + " Order Integration Code,Line Item,Line Item ID,Line Item" + " Status,Line Item Integration Code,Targeted Data Providers," + "Cookie Reach: Average Impression Frequency,Cookie Reach: " + "Impression Reach" + ), + ( + "BR,test_partner,123,Active,test_advertiser,123," + "0,,test_io,123,Active,,test_line_item" + ',123,0,,"",0.00,41' + ), ] expected_dict = { "Country": "BR", @@ -209,45 +213,40 @@ def test_response_not_binary(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", } line_iterator_with_blank_line = (line for line in lines) - for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line - ): + for dic in get_generator_dict_from_str_csv(line_iterator_with_blank_line): self.assertEqual(dic, expected_dict) def test_line_parsing(self): - input_lines = [ - 'abc, 1, 0.0, 4, "a,b,c", abc', - '"abc", 1, 0.0, 4, "a,b,c", abc', - 'abc, 1, 0.0, 4, abc, abc' - ] + input_lines = ['abc, 1, 0.0, 4, "a,b,c", abc', '"abc", 1, 0.0, 4, "a,b,c", abc', "abc, 1, 0.0, 4, abc, abc"] expected_outputs = [ - ['abc', '1', '0.0', '4', 'a,b,c', 'abc'], - ['abc', '1', '0.0', '4', 'a,b,c', 'abc'], - ['abc', '1', '0.0', '4', 'abc', 'abc'] + ["abc", "1", "0.0", "4", "a,b,c", "abc"], + ["abc", "1", "0.0", "4", "a,b,c", "abc"], + ["abc", "1", "0.0", "4", "abc", "abc"], ] for index in range(len(input_lines)): - self.assertEqual( - parse_decoded_line(input_lines[index]), - expected_outputs[index] - ) + self.assertEqual(parse_decoded_line(input_lines[index]), expected_outputs[index]) def test_response_not_binary_with_date(self): lines = [ - ("Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - " ID,Advertiser Status,Advertiser Integration Code,Insertion" - " Order,Insertion Order ID,Insertion Order Status,Insertion" - " Order Integration Code,Line Item,Line Item ID,Line Item" - " Status,Line Item Integration Code,Targeted Data Providers," - "Cookie Reach: Average Impression Frequency,Cookie Reach: " - "Impression Reach"), - ('BR,test_partner,123,Active,test_advertiser,123,' - '0,,test_io,123,Active,,test_line_item' - ',123,0,,"",0.00,41') + ( + "Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" + " ID,Advertiser Status,Advertiser Integration Code,Insertion" + " Order,Insertion Order ID,Insertion Order Status,Insertion" + " Order Integration Code,Line Item,Line Item ID,Line Item" + " Status,Line Item Integration Code,Targeted Data Providers," + "Cookie Reach: Average Impression Frequency,Cookie Reach: " + "Impression Reach" + ), + ( + "BR,test_partner,123,Active,test_advertiser,123," + "0,,test_io,123,Active,,test_line_item" + ',123,0,,"",0.00,41' + ), ] expected_dict = { "Country": "BR", @@ -266,52 +265,41 @@ def test_response_not_binary_with_date(self): "Line Item ID": "123", "Line Item Status": "0", "Line Item Integration Code": "", - "Targeted Data Providers": '', + "Targeted Data Providers": "", "Cookie Reach: Average Impression Frequency": "0.00", "Cookie Reach: Impression Reach": "41", "date_start": "2020/01/01", - "date_stop": "2020/01/31" + "date_stop": "2020/01/31", } line_iterator_with_blank_line = (line for line in lines) with patch("nck.utils.date_handler.date") as mock_date: mock_date.today.return_value = date(2020, 2, 1) mock_date.side_effect = lambda *args, **kw: date(*args, **kw) for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line, - add_date=True, - day_range="PREVIOUS_MONTH", - date_format="%Y/%m/%d" + line_iterator_with_blank_line, add_date=True, day_range="PREVIOUS_MONTH", date_format="%Y/%m/%d" ): self.assertEqual(dic, expected_dict) - @parameterized.expand([ - ( - True, - [ - b'"Perf report (2017-03-01 - 2020-03-25)"', - b'AdFormat\tAdGroupId\tAdGroupName', - b'IMAGE\t123\tAdGroup', - b'IMAGE\t123\tAdGroup', - ] - ), - ( - False, - [ - b'AdFormat\tAdGroupId\tAdGroupName', - b'IMAGE\t123\tAdGroup', - b'IMAGE\t123\tAdGroup', - ] - ) - ]) + def test_csv_with_headers_only(self): + input_report = (row for row in [b"Just,Headers,in,this,empty,report"]) + self.assertFalse(next(get_generator_dict_from_str_csv(input_report), False), "Data is not empty") + + @parameterized.expand( + [ + ( + True, + [ + b'"Perf report (2017-03-01 - 2020-03-25)"', + b"AdFormat\tAdGroupId\tAdGroupName", + b"IMAGE\t123\tAdGroup", + b"IMAGE\t123\tAdGroup", + ], + ), + (False, [b"AdFormat\tAdGroupId\tAdGroupName", b"IMAGE\t123\tAdGroup", b"IMAGE\t123\tAdGroup"]), + ] + ) def test_parse_tsv_with_first_row_skipped(self, skip_first_row, lines): - expected_dict = { - "AdFormat": "IMAGE", - "AdGroupId": "123", - "AdGroupName": "AdGroup" - } + expected_dict = {"AdFormat": "IMAGE", "AdGroupId": "123", "AdGroupName": "AdGroup"} line_iterator = (line for line in lines) - for dic in get_generator_dict_from_str_tsv( - line_iterator, - skip_first_row=skip_first_row - ): + for dic in get_generator_dict_from_str_tsv(line_iterator, skip_first_row=skip_first_row): self.assertEqual(dic, expected_dict) From fdbc3699437b343cd6a4684c5a61ff28237daab3 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Fri, 24 Apr 2020 10:24:25 +0200 Subject: [PATCH 61/66] Fix: handling empty reports --- nck/readers/adobe_reader.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/nck/readers/adobe_reader.py b/nck/readers/adobe_reader.py index aae07c03..6ff11b63 100644 --- a/nck/readers/adobe_reader.py +++ b/nck/readers/adobe_reader.py @@ -174,13 +174,14 @@ def download_report(self, rep_id): Parses reportResponses and iterates over report pages. """ raw_response = self.get_report(rep_id, page_number=1) - all_responses = [parse(raw_response)] - if "totalPages" in raw_response["report"]: - all_responses = all_responses + [ - parse(self.get_report(rep_id, page_number=np)) - for np in range(2, raw_response["report"]["totalPages"] + 1) - ] - return chain(*all_responses) + if raw_response.get("error") != "no_warehouse_data": + all_responses = [parse(raw_response)] + if "totalPages" in raw_response["report"]: + all_responses = all_responses + [ + parse(self.get_report(rep_id, page_number=np)) + for np in range(2, raw_response["report"]["totalPages"] + 1) + ] + return chain(*all_responses) def read(self): if self.kwargs.get("list_report_suite", False): @@ -194,7 +195,11 @@ def read(self): idf = "report_" + str(rep_id) def result_generator(): - for record in data: - yield record + if data: + for record in data: + yield record + # Returning an empty generator if report is empty + else: + yield from () yield JSONStream("results_" + idf, result_generator()) From 1adb3169a9c1549eb4805cd1fc728cfb1b2889ea Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Fri, 24 Apr 2020 14:59:39 +0200 Subject: [PATCH 62/66] Simplifying result_generator --- nck/readers/adobe_reader.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/nck/readers/adobe_reader.py b/nck/readers/adobe_reader.py index 6ff11b63..5957a88a 100644 --- a/nck/readers/adobe_reader.py +++ b/nck/readers/adobe_reader.py @@ -196,10 +196,6 @@ def read(self): def result_generator(): if data: - for record in data: - yield record - # Returning an empty generator if report is empty - else: - yield from () + yield from data yield JSONStream("results_" + idf, result_generator()) From 3b7f730bcb9195105536e465f6ca0863a5b5ee3c Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Fri, 24 Apr 2020 18:37:52 +0200 Subject: [PATCH 63/66] fix doc --- nck/readers/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 04768304..fd730835 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -203,8 +203,7 @@ python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client |`--sa360-report-name` | (Optional) Name of the output report | |`--sa360-report-type` | Type of the report to request. List [here](https://developers.google.com/search-ads/v2/report-types)| |`--sa360-column` | Dimensions and metrics to request in the report | -|`--sa360-custom-dimension` | (Optional) Custom dimensions to report | -|`--sa360-custom-metric` | (Optional) Custom metrics to report | +|`--sa360-saved-column` | (Optional) Saved columns to report. See [documentation](https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns)| |`--sa360-start-date` | Start date of the period to request | |`--sa360-end-date` | End date of the period to request | From 650d7490e257b932cfdc6979a56798973bb3f6a6 Mon Sep 17 00:00:00 2001 From: Vincent Viers <30295971+vviers@users.noreply.github.com> Date: Thu, 14 May 2020 10:55:34 +0200 Subject: [PATCH 64/66] Update project config (#11) * (config) add Pipenv files to gitignore * (config) make requirements-dev recursively install requirements * (config) update Makefile requirements command into a one-liner --- .gitignore | 5 ++++- Makefile | 1 - requirements-dev.txt | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 57f9190c..b678a637 100644 --- a/.gitignore +++ b/.gitignore @@ -113,4 +113,7 @@ tmp/ credentials/* .nck.egg-info -.DS_Store \ No newline at end of file +.DS_Store + +# Pipenv +Pipfile* \ No newline at end of file diff --git a/Makefile b/Makefile index 2d0a2405..eb6d2af5 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,6 @@ clean_pyc: .PHONY: requirements requirements: - pip install -r requirements.txt pip install -r requirements-dev.txt .PHONY: dist diff --git a/requirements-dev.txt b/requirements-dev.txt index 561d0b7c..f0b0c979 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ +-r requirements.txt ipdb flake8 nose From 15bfdebc25ce133a0b64d95d02e7f757e3d05d1c Mon Sep 17 00:00:00 2001 From: vivienmorlet Date: Fri, 15 May 2020 17:44:49 +0200 Subject: [PATCH 65/66] don't process total line in csv files (#14) * don't process total line in csv files * hotfix lint adobe_reader * fix stopIteration * handle sa360 csv * try fix local test_main_method * Fix: remove trailing blank space in Google ads report Co-authored-by: Vivien MORLET Co-authored-by: benoitgoujon --- nck/readers/adobe_reader.py | 4 ++-- nck/readers/googleads_reader.py | 2 +- nck/readers/sa360_reader.py | 2 +- nck/utils/text.py | 24 ++++++++++++++++-------- tests/test_main_method.py | 7 +++---- tests/utils/test_text_utils.py | 5 ++++- 6 files changed, 27 insertions(+), 17 deletions(-) diff --git a/nck/readers/adobe_reader.py b/nck/readers/adobe_reader.py index 5957a88a..6c0c4b5b 100644 --- a/nck/readers/adobe_reader.py +++ b/nck/readers/adobe_reader.py @@ -99,7 +99,7 @@ def build_report_description(self): "source": "warehouse", "reportSuiteID": self.kwargs.get("report_suite_id"), "elements": [{"id": el} for el in self.kwargs.get("report_element_id", [])], - "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])] + "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])], } } self.set_date_gran_report_desc(report_description) @@ -164,7 +164,7 @@ def get_report(self, report_id, page_number=1): logging.info(f"waiting {idx} s for report to be ready") sleep(idx + 1) if idx + 1 > MAX_WAIT_REPORT_DELAY: - raise ReportNotReadyError(f"waited too long for report to be ready") + raise ReportNotReadyError("waited too long for report to be ready") idx = idx * 2 response = request_f() return response diff --git a/nck/readers/googleads_reader.py b/nck/readers/googleads_reader.py index 29e2f832..5693fe71 100644 --- a/nck/readers/googleads_reader.py +++ b/nck/readers/googleads_reader.py @@ -59,7 +59,7 @@ "This field is ignored if manager_id is specified (replaced by the accounts linked to the MCC)", ) @click.option( - "--googleads-report-name", default="Custom Report", help="Name given to your Report" + "--googleads-report-name", default="CustomReport", help="Name given to your Report" ) @click.option( "--googleads-report-type", diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index 5f2528a5..1d7b1e30 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -103,7 +103,7 @@ def result_generator(self): report_data = self.sa360_client.assert_report_file_ready(report_id) for report_generator in self.sa360_client.download_report_files(report_data, report_id): - yield from get_generator_dict_from_str_csv(report_generator) + yield from get_generator_dict_from_str_csv(report_generator, skip_last_row=False) def read(self): if not self.advertiser_ids: diff --git a/nck/utils/text.py b/nck/utils/text.py index 08170d10..2813966d 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -39,7 +39,11 @@ def add_column_value_to_csv_line_iterator(line_iterator, columname, value): def get_generator_dict_from_str_csv( - line_iterator: Generator[Union[bytes, str], None, None], add_date=False, day_range=None, date_format="%Y-%m-%d" + line_iterator: Generator[Union[bytes, str], None, None], + add_date=False, + day_range=None, + date_format="%Y-%m-%d", + skip_last_row=True, ) -> Generator[Dict[str, str], None, None]: first_line = next(line_iterator) headers = ( @@ -49,10 +53,13 @@ def get_generator_dict_from_str_csv( ) if add_date: headers.extend(["date_start", "date_stop"]) - for line in line_iterator: - if isinstance(line, bytes): + + next_line = next(line_iterator, None) + while next_line is not None: + current_line = next_line + if isinstance(current_line, bytes): try: - line = line.decode("utf-8") + current_line = current_line.decode("utf-8") except UnicodeDecodeError as err: logging.warning( "An error has occurred while parsing the file. " @@ -61,16 +68,17 @@ def get_generator_dict_from_str_csv( err.encoding, err.object[err.start : err.end], ) - line = line.decode("utf-8", errors="ignore") + current_line = current_line.decode("utf-8", errors="ignore") - if line == "": + next_line = next(line_iterator, "") + if len(current_line) == 0 or (skip_last_row and len(next_line) == 0): break if add_date: start, end = get_date_start_and_date_stop_from_range(day_range) - line += f",{start.strftime(date_format)},{end.strftime(date_format)}" + current_line += f",{start.strftime(date_format)},{end.strftime(date_format)}" - yield dict(zip(headers, parse_decoded_line(line))) + yield dict(zip(headers, parse_decoded_line(current_line))) def get_generator_dict_from_str_tsv( diff --git a/tests/test_main_method.py b/tests/test_main_method.py index 9f30b753..41b38fc8 100644 --- a/tests/test_main_method.py +++ b/tests/test_main_method.py @@ -21,11 +21,10 @@ def mock_generator(): def mock_read(): yield JSONStream("plop", Test_Normalize_Option.mock_generator()) - @mock.patch.object(nck.readers.reader.Reader, 'read', mock_read) - @mock.patch('nck.writers.writer.Writer.write') + @mock.patch.object(nck.readers.reader.Reader, "read", mock_read) + @mock.patch("nck.writers.writer.Writer.write") def test_normalize_behaviour(self, mock_write): r = Reader w = Writer nck.entrypoint.run([r, w], None, None, None, True) - - assert mock_write.call_args.args[0].__class__ == NormalizedJSONStream + self.assertEqual(mock_write.call_args[0][0].__class__, NormalizedJSONStream) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index 7c6f6a6e..a12a167d 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -83,6 +83,7 @@ def test_blank_line(self): b"Cookie Reach: Average Impression Frequency,Cookie Reach: " b"Impression Reach" ), + b"(Not desired last line) Total line: ,,,,,,,,,,100,100,100,100,100", "", ] line_iterator_with_blank_line = (line for line in lines) @@ -282,7 +283,9 @@ def test_response_not_binary_with_date(self): def test_csv_with_headers_only(self): input_report = (row for row in [b"Just,Headers,in,this,empty,report"]) - self.assertFalse(next(get_generator_dict_from_str_csv(input_report), False), "Data is not empty") + self.assertFalse( + next(get_generator_dict_from_str_csv(input_report, skip_last_row=False), False), "Data is not empty" + ) @parameterized.expand( [ From 2e4aabd73b007169bbb6db16d154945fee5c4b47 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 May 2020 19:45:41 +0200 Subject: [PATCH 66/66] Build(deps): Bump httplib2 from 0.14.0 to 0.18.0 (#16) Bumps [httplib2](https://github.com/httplib2/httplib2) from 0.14.0 to 0.18.0. - [Release notes](https://github.com/httplib2/httplib2/releases) - [Changelog](https://github.com/httplib2/httplib2/blob/master/CHANGELOG) - [Commits](https://github.com/httplib2/httplib2/compare/v0.14.0...v0.18.0) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 71a60889..9c7782e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ googleanalytics==0.26.0 googleapis-common-protos==1.6.0 gspread==3.1.0 hiredis==1.0.1 -httplib2==0.14.0 +httplib2==0.18.0 idna==2.8 inspect-it==0.3.2 Jinja2==2.10.3