Skip to content

Commit

Permalink
Revert "Include no quotes in files of cleaned data"
Browse files Browse the repository at this point in the history
This reverts commit 9e1fff0.
  • Loading branch information
krysal committed May 8, 2024
1 parent 989784d commit 0c3eaa5
Showing 1 changed file with 1 addition and 12 deletions.
13 changes: 1 addition & 12 deletions ingestion_server/ingestion_server/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def _upload_to_s3(self, field: str):
s3_file_name = f"{self.s3_path}/{self.date}_{field}_{part_number}.tsv"
tsv_file = f"{field}.tsv"
with open(tsv_file, "w") as f:
csv_writer = csv.writer(f, delimiter="\t", quoting=csv.QUOTE_NONE)
csv_writer = csv.writer(f, delimiter="\t")
csv_writer.writerows(self.buffer[field].rows)
try:
self.s3_bucket.upload_file(tsv_file, s3_file_name)
Expand All @@ -334,23 +334,12 @@ def _upload_to_s3(self, field: str):
self.buffer[field].part += 1
self.buffer[field].rows = []

@staticmethod
def _trim_quotes(value: str):
if value.startswith(("'", '"')) and value.endswith(("'", '"')):
log.debug(f"Trimmed quotes from {value} returning {value[1:-1]}")
return value[1:-1]
return value

def save(self, result: dict) -> dict[str, int]:
for field, cleaned_items in result.items():
if not cleaned_items or not self.s3_bucket:
continue

for i, (identifier, value) in enumerate(cleaned_items):
cleaned_items[i] = (identifier, self._trim_quotes(value))

self.buffer[field].rows += cleaned_items

if len(self.buffer[field].rows) >= self.buffer_size:
self._upload_to_s3(field)

Expand Down

0 comments on commit 0c3eaa5

Please sign in to comment.