diff --git a/docs/cli.rst b/docs/cli.rst index cb9574c21..6597fb6dc 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -187,8 +187,8 @@ The ``--nl``, ``--csv`` and ``--table`` options are all available. .. _cli_inserting_data: -Inserting data -============== +Inserting JSON data +=================== If you have data as JSON, you can use ``sqlite-utils insert tablename`` to insert it into a database. The table will be created with the correct (automatically detected) columns if it does not already exist. @@ -249,6 +249,17 @@ This also means you pipe ``sqlite-utils`` together to easily create a new SQLite 207368,920 Kirkham St,37.760210314285,-122.47073935813 188702,1501 Evans Ave,37.7422086702947,-122.387293152263 +Inserting CSV or TSV data +========================= + +If your data is in CSV format, you can insert it using the ``--csv`` option:: + + $ sqlite-utils insert dogs.db dogs docs.csv --csv + +For tab-delimited data, use ``--tsv``:: + + $ sqlite-utils insert dogs.db dogs docs.tsv --tsv + Upserting data ============== diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index d53a77c69..7b4098dac 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -307,6 +307,7 @@ def insert_upsert_options(fn): ), click.option("--nl", is_flag=True, help="Expect newline-delimited JSON"), click.option("-c", "--csv", is_flag=True, help="Expect CSV"), + click.option("--tsv", is_flag=True, help="Expect TSV"), click.option( "--batch-size", type=int, default=100, help="Commit every X records" ), @@ -339,6 +340,7 @@ def insert_upsert_implementation( pk, nl, csv, + tsv, batch_size, alter, upsert, @@ -347,13 +349,13 @@ def insert_upsert_implementation( default=None, ): db = sqlite_utils.Database(path) - if nl and csv: - click.echo("Use just one of --nl and --csv", err=True) - return + if (nl + csv + tsv) >= 2: + raise click.ClickException("Use just one of --nl, --csv or --tsv") if pk and len(pk) == 1: pk = pk[0] - if csv: - reader = csv_std.reader(json_file) + if csv or tsv: + dialect = "excel-tab" if tsv else "excel" + reader = csv_std.reader(json_file, dialect=dialect) headers = next(reader) docs = (dict(zip(headers, row)) for row in reader) elif nl: @@ -381,7 +383,18 @@ def insert_upsert_implementation( "--ignore", is_flag=True, default=False, help="Ignore records if pk already exists" ) def insert( - path, table, json_file, pk, nl, csv, batch_size, alter, ignore, not_null, default + path, + table, + json_file, + pk, + nl, + csv, + tsv, + batch_size, + alter, + ignore, + not_null, + default, ): """ Insert records from JSON file into a table, creating the table if it @@ -396,6 +409,7 @@ def insert( pk, nl, csv, + tsv, batch_size, alter=alter, upsert=False, @@ -407,7 +421,9 @@ def insert( @cli.command() @insert_upsert_options -def upsert(path, table, json_file, pk, nl, csv, batch_size, alter, not_null, default): +def upsert( + path, table, json_file, pk, nl, csv, tsv, batch_size, alter, not_null, default +): """ Upsert records based on their primary key. Works like 'insert' but if an incoming record has a primary key that matches an existing record @@ -420,6 +436,7 @@ def upsert(path, table, json_file, pk, nl, csv, batch_size, alter, not_null, def pk, nl, csv, + tsv, batch_size, alter=alter, upsert=True, diff --git a/tests/test_cli.py b/tests/test_cli.py index 0e01814f7..2fb1d04b5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -511,6 +511,38 @@ def test_insert_ignore(db_path, tmpdir): ) +@pytest.mark.parametrize( + "content,option", + (("foo\tbar\tbaz\n1\t2\t3", "--tsv"), ("foo,bar,baz\n1,2,3", "--csv")), +) +def test_insert_csv_tsv(content, option, db_path, tmpdir): + db = Database(db_path) + file_path = str(tmpdir / "insert.csv-tsv") + open(file_path, "w").write(content) + result = CliRunner().invoke(cli.cli, ["insert", db_path, "data", file_path, option]) + assert 0 == result.exit_code + assert [{"foo": "1", "bar": "2", "baz": "3"}] == list(db["data"].rows) + + +@pytest.mark.parametrize( + "options", + ( + ["--tsv", "--nl"], + ["--tsv", "--csv"], + ["--csv", "--nl"], + ["--csv", "--nl", "--tsv"], + ), +) +def test_only_allow_one_of_nl_tsv_csv(options, db_path, tmpdir): + file_path = str(tmpdir / "insert.csv-tsv") + open(file_path, "w").write("foo") + result = CliRunner().invoke( + cli.cli, ["insert", db_path, "data", file_path] + options + ) + assert 0 != result.exit_code + assert "Error: Use just one of --nl, --csv or --tsv" == result.output.strip() + + def test_upsert(db_path, tmpdir): test_insert_multiple_with_primary_key(db_path, tmpdir) json_path = str(tmpdir / "upsert.json")