Skip to content

Commit

Permalink
don't process total line in csv files (#14)
Browse files Browse the repository at this point in the history
* don't process total line in csv files

* hotfix lint adobe_reader

* fix stopIteration

* handle sa360 csv

* try fix local test_main_method

* Fix: remove trailing blank space in Google ads report

Co-authored-by: Vivien MORLET <[email protected]>
Co-authored-by: benoitgoujon <[email protected]>
  • Loading branch information
3 people authored May 15, 2020
1 parent 650d749 commit 15bfdeb
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 17 deletions.
4 changes: 2 additions & 2 deletions nck/readers/adobe_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def build_report_description(self):
"source": "warehouse",
"reportSuiteID": self.kwargs.get("report_suite_id"),
"elements": [{"id": el} for el in self.kwargs.get("report_element_id", [])],
"metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])]
"metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])],
}
}
self.set_date_gran_report_desc(report_description)
Expand Down Expand Up @@ -164,7 +164,7 @@ def get_report(self, report_id, page_number=1):
logging.info(f"waiting {idx} s for report to be ready")
sleep(idx + 1)
if idx + 1 > MAX_WAIT_REPORT_DELAY:
raise ReportNotReadyError(f"waited too long for report to be ready")
raise ReportNotReadyError("waited too long for report to be ready")
idx = idx * 2
response = request_f()
return response
Expand Down
2 changes: 1 addition & 1 deletion nck/readers/googleads_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"This field is ignored if manager_id is specified (replaced by the accounts linked to the MCC)",
)
@click.option(
"--googleads-report-name", default="Custom Report", help="Name given to your Report"
"--googleads-report-name", default="CustomReport", help="Name given to your Report"
)
@click.option(
"--googleads-report-type",
Expand Down
2 changes: 1 addition & 1 deletion nck/readers/sa360_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def result_generator(self):
report_data = self.sa360_client.assert_report_file_ready(report_id)

for report_generator in self.sa360_client.download_report_files(report_data, report_id):
yield from get_generator_dict_from_str_csv(report_generator)
yield from get_generator_dict_from_str_csv(report_generator, skip_last_row=False)

def read(self):
if not self.advertiser_ids:
Expand Down
24 changes: 16 additions & 8 deletions nck/utils/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def add_column_value_to_csv_line_iterator(line_iterator, columname, value):


def get_generator_dict_from_str_csv(
line_iterator: Generator[Union[bytes, str], None, None], add_date=False, day_range=None, date_format="%Y-%m-%d"
line_iterator: Generator[Union[bytes, str], None, None],
add_date=False,
day_range=None,
date_format="%Y-%m-%d",
skip_last_row=True,
) -> Generator[Dict[str, str], None, None]:
first_line = next(line_iterator)
headers = (
Expand All @@ -49,10 +53,13 @@ def get_generator_dict_from_str_csv(
)
if add_date:
headers.extend(["date_start", "date_stop"])
for line in line_iterator:
if isinstance(line, bytes):

next_line = next(line_iterator, None)
while next_line is not None:
current_line = next_line
if isinstance(current_line, bytes):
try:
line = line.decode("utf-8")
current_line = current_line.decode("utf-8")
except UnicodeDecodeError as err:
logging.warning(
"An error has occurred while parsing the file. "
Expand All @@ -61,16 +68,17 @@ def get_generator_dict_from_str_csv(
err.encoding,
err.object[err.start : err.end],
)
line = line.decode("utf-8", errors="ignore")
current_line = current_line.decode("utf-8", errors="ignore")

if line == "":
next_line = next(line_iterator, "")
if len(current_line) == 0 or (skip_last_row and len(next_line) == 0):
break

if add_date:
start, end = get_date_start_and_date_stop_from_range(day_range)
line += f",{start.strftime(date_format)},{end.strftime(date_format)}"
current_line += f",{start.strftime(date_format)},{end.strftime(date_format)}"

yield dict(zip(headers, parse_decoded_line(line)))
yield dict(zip(headers, parse_decoded_line(current_line)))


def get_generator_dict_from_str_tsv(
Expand Down
7 changes: 3 additions & 4 deletions tests/test_main_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@ def mock_generator():
def mock_read():
yield JSONStream("plop", Test_Normalize_Option.mock_generator())

@mock.patch.object(nck.readers.reader.Reader, 'read', mock_read)
@mock.patch('nck.writers.writer.Writer.write')
@mock.patch.object(nck.readers.reader.Reader, "read", mock_read)
@mock.patch("nck.writers.writer.Writer.write")
def test_normalize_behaviour(self, mock_write):
r = Reader
w = Writer
nck.entrypoint.run([r, w], None, None, None, True)

assert mock_write.call_args.args[0].__class__ == NormalizedJSONStream
self.assertEqual(mock_write.call_args[0][0].__class__, NormalizedJSONStream)
5 changes: 4 additions & 1 deletion tests/utils/test_text_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def test_blank_line(self):
b"Cookie Reach: Average Impression Frequency,Cookie Reach: "
b"Impression Reach"
),
b"(Not desired last line) Total line: ,,,,,,,,,,100,100,100,100,100",
"",
]
line_iterator_with_blank_line = (line for line in lines)
Expand Down Expand Up @@ -282,7 +283,9 @@ def test_response_not_binary_with_date(self):

def test_csv_with_headers_only(self):
input_report = (row for row in [b"Just,Headers,in,this,empty,report"])
self.assertFalse(next(get_generator_dict_from_str_csv(input_report), False), "Data is not empty")
self.assertFalse(
next(get_generator_dict_from_str_csv(input_report, skip_last_row=False), False), "Data is not empty"
)

@parameterized.expand(
[
Expand Down

0 comments on commit 15bfdeb

Please sign in to comment.