don't process total line in csv files (#14)

* don't process total line in csv files * hotfix lint adobe_reader * fix stopIteration * handle sa360 csv * try fix local test_main_method * Fix: remove trailing blank space in Google ads report Co-authored-by: Vivien MORLET <[email protected]> Co-authored-by: benoitgoujon <[email protected]>
artefactory · May 15, 2020 · 15bfdeb · 15bfdeb
1 parent 650d749
commit 15bfdeb
Show file tree

Hide file tree

Showing 6 changed files with 27 additions and 17 deletions.
diff --git a/nck/readers/adobe_reader.py b/nck/readers/adobe_reader.py
@@ -99,7 +99,7 @@ def build_report_description(self):
                 "source": "warehouse",
                 "reportSuiteID": self.kwargs.get("report_suite_id"),
                 "elements": [{"id": el} for el in self.kwargs.get("report_element_id", [])],
-                "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])]
+                "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])],
             }
         }
         self.set_date_gran_report_desc(report_description)
@@ -164,7 +164,7 @@ def get_report(self, report_id, page_number=1):
             logging.info(f"waiting {idx} s for report to be ready")
             sleep(idx + 1)
             if idx + 1 > MAX_WAIT_REPORT_DELAY:
-                raise ReportNotReadyError(f"waited too long for report to be ready")
+                raise ReportNotReadyError("waited too long for report to be ready")
             idx = idx * 2
             response = request_f()
         return response

diff --git a/nck/readers/googleads_reader.py b/nck/readers/googleads_reader.py
@@ -59,7 +59,7 @@
     "This field is ignored if manager_id is specified (replaced by the accounts linked to the MCC)",
 )
 @click.option(
-    "--googleads-report-name", default="Custom Report", help="Name given to your Report"
+    "--googleads-report-name", default="CustomReport", help="Name given to your Report"
 )
 @click.option(
     "--googleads-report-type",

diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py
@@ -103,7 +103,7 @@ def result_generator(self):
             report_data = self.sa360_client.assert_report_file_ready(report_id)
 
             for report_generator in self.sa360_client.download_report_files(report_data, report_id):
-                yield from get_generator_dict_from_str_csv(report_generator)
+                yield from get_generator_dict_from_str_csv(report_generator, skip_last_row=False)
 
     def read(self):
         if not self.advertiser_ids:

diff --git a/nck/utils/text.py b/nck/utils/text.py
@@ -39,7 +39,11 @@ def add_column_value_to_csv_line_iterator(line_iterator, columname, value):
 
 
 def get_generator_dict_from_str_csv(
-    line_iterator: Generator[Union[bytes, str], None, None], add_date=False, day_range=None, date_format="%Y-%m-%d"
+    line_iterator: Generator[Union[bytes, str], None, None],
+    add_date=False,
+    day_range=None,
+    date_format="%Y-%m-%d",
+    skip_last_row=True,
 ) -> Generator[Dict[str, str], None, None]:
     first_line = next(line_iterator)
     headers = (
@@ -49,10 +53,13 @@ def get_generator_dict_from_str_csv(
     )
     if add_date:
         headers.extend(["date_start", "date_stop"])
-    for line in line_iterator:
-        if isinstance(line, bytes):
+
+    next_line = next(line_iterator, None)
+    while next_line is not None:
+        current_line = next_line
+        if isinstance(current_line, bytes):
             try:
-                line = line.decode("utf-8")
+                current_line = current_line.decode("utf-8")
             except UnicodeDecodeError as err:
                 logging.warning(
                     "An error has occurred while parsing the file. "
@@ -61,16 +68,17 @@ def get_generator_dict_from_str_csv(
                     err.encoding,
                     err.object[err.start : err.end],
                 )
-                line = line.decode("utf-8", errors="ignore")
+                current_line = current_line.decode("utf-8", errors="ignore")
 
-        if line == "":
+        next_line = next(line_iterator, "")
+        if len(current_line) == 0 or (skip_last_row and len(next_line) == 0):
             break
 
         if add_date:
             start, end = get_date_start_and_date_stop_from_range(day_range)
-            line += f",{start.strftime(date_format)},{end.strftime(date_format)}"
+            current_line += f",{start.strftime(date_format)},{end.strftime(date_format)}"
 
-        yield dict(zip(headers, parse_decoded_line(line)))
+        yield dict(zip(headers, parse_decoded_line(current_line)))
 
 
 def get_generator_dict_from_str_tsv(

diff --git a/tests/test_main_method.py b/tests/test_main_method.py
@@ -21,11 +21,10 @@ def mock_generator():
     def mock_read():
         yield JSONStream("plop", Test_Normalize_Option.mock_generator())
 
-    @mock.patch.object(nck.readers.reader.Reader, 'read', mock_read)
-    @mock.patch('nck.writers.writer.Writer.write')
+    @mock.patch.object(nck.readers.reader.Reader, "read", mock_read)
+    @mock.patch("nck.writers.writer.Writer.write")
     def test_normalize_behaviour(self, mock_write):
         r = Reader
         w = Writer
         nck.entrypoint.run([r, w], None, None, None, True)
-
-        assert mock_write.call_args.args[0].__class__ == NormalizedJSONStream
+        self.assertEqual(mock_write.call_args[0][0].__class__, NormalizedJSONStream)
diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py
@@ -83,6 +83,7 @@ def test_blank_line(self):
                 b"Cookie Reach: Average Impression Frequency,Cookie Reach: "
                 b"Impression Reach"
             ),
+            b"(Not desired last line) Total line: ,,,,,,,,,,100,100,100,100,100",
             "",
         ]
         line_iterator_with_blank_line = (line for line in lines)
@@ -282,7 +283,9 @@ def test_response_not_binary_with_date(self):
 
     def test_csv_with_headers_only(self):
         input_report = (row for row in [b"Just,Headers,in,this,empty,report"])
-        self.assertFalse(next(get_generator_dict_from_str_csv(input_report), False), "Data is not empty")
+        self.assertFalse(
+            next(get_generator_dict_from_str_csv(input_report, skip_last_row=False), False), "Data is not empty"
+        )
 
     @parameterized.expand(
         [