Skip to content

Commit

Permalink
Add batching, to not go over 10MB size
Browse files Browse the repository at this point in the history
  • Loading branch information
chmielsen committed Jul 23, 2024
1 parent e1738d4 commit a04d61c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
26 changes: 23 additions & 3 deletions plugin_scripts/insert_rows.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
import json
import logging
import sys
from itertools import islice

from google.cloud import bigquery

from .config import Config, read_config

sys.tracebacklimit = 0

BATCH_SIZE = 20000


def batched(iterable, n):
# batched('ABCDEFG', 3) → ABC DEF G
if n < 1:
raise ValueError("n must be at least one")
iterator = iter(iterable)
while batch := tuple(islice(iterator, n)):
yield batch


def insert_rows(config: Config) -> None:
"""
Expand All @@ -31,14 +43,22 @@ def insert_rows(config: Config) -> None:
with open(config.bq_rows_as_json_path, "r") as row_file:
rows = json.load(row_file)

logging.info(f"Loaded {len(rows)} rows. Inserting...")
if not isinstance(rows, list):
raise ValueError(f"Expected JSON file to be a list of rows, was: {type(rows)}")

logging.info(f"Loaded {len(rows)} rows. Inserting in batches {BATCH_SIZE}...")

total_errors = []
for batch in batched(rows, BATCH_SIZE):
errors = client.insert_rows_json(table_ref, batch)
total_errors.extend(errors)

errors = client.insert_rows_json(table_ref, rows)

logging.info(f"Inserted rows with {len(errors)} errors")
for e in errors:
for e in total_errors:
logging.error(e)
if len(errors) > 0:
if len(total_errors) > 0:
raise Exception("Got exceptions on returning rows, see above.")


Expand Down
2 changes: 1 addition & 1 deletion tests/test_insert_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def test__main_true(
):
mocker.patch("json.loads")
mocker.patch("plugin_scripts.insert_rows.bigquery")
mocker.patch("json.load")
mocker.patch("json.load", return_value=[{"a": 1}, {"b": 2}])
mocker.patch("builtins.open")
insert_rows.main()

Expand Down

0 comments on commit a04d61c

Please sign in to comment.