Skip to content

Commit

Permalink
add truncate
Browse files Browse the repository at this point in the history
  • Loading branch information
kindly committed Jun 18, 2024
1 parent 830c6ac commit 91dfbe5
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 14 deletions.
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "flatterer"
version = "0.19.16"
version = "0.19.17"
authors = ["David Raznick <[email protected]>"]
edition = "2021"
license = "MIT"
Expand All @@ -14,9 +14,9 @@ serde_json = { version = "1.0.83", features = ["preserve_order"] }
pyo3 = { version = "0.18.3", features = ["extension-module", "eyre"] }
eyre = "0.6.8"
#libflatterer={path = "../libflatterer"}
libflatterer = "0.19.14"
libflatterer = "0.19.16"

flatterer-web = "0.19.14"
flatterer-web = "0.19.16"
#flatterer-web={path = "../flatterer-web"}

env_logger = "0.10.1"
Expand Down
8 changes: 8 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.

and this project adheres to [Semantic Versioning](http://semver.org/).

## [0.19.17] - 2024-06-18

### New
- truncate postgres

### Fixed
- timezone date types now accepted in postgres

## [0.19.15] - 2024-05-09

### Fixed
Expand Down
22 changes: 22 additions & 0 deletions docs/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ Options:
tables to fit data
--drop When loading to postgres or sqlite, drop table
if already exists.
--truncate When loading to postgres or sqlite, truncate table
if already exists.
--id-prefix TEXT Prefix for all `_link` id fields
--stats Produce stats about the data in the
datapackage.json file
Expand Down Expand Up @@ -475,6 +477,26 @@ import flatterer
flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', drop=True)
```

## Truncate Tables

**Warning: this could mean you loose data**

For postgres and sqlite. Truncate the existing table if it exists. This is useful if you want to load the data into a databse with the schema pre-defined.

### CLI Usage

```bash
flatterer --postgres='postgres://user:pass@host/dbname' --sqlite-path=sqlite.db INPUT_FILE OUTPUT_DIRECTORY --truncate
```

### Python Usage

```python
import flatterer

flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', truncate=True)
```

## Fields File

Path to fields CSV file. The fields file can be used for:
Expand Down
9 changes: 7 additions & 2 deletions flatterer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def flatten(
gzip_input=False,
json_path="",
arrays_new_table=False,
truncate=False,
):
global LOGGING_SETUP
if not LOGGING_SETUP:
Expand Down Expand Up @@ -144,7 +145,8 @@ def flatten(
table_prefix, id_prefix, emit_obj, force,
schema, schema_titles, path, json_stream, ndjson,
sqlite_path, threads, log_error, postgres, postgres_schema,
drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table)
drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory,
gzip_input, json_path, arrays_new_table, truncate)
elif method == 'iter':
if path:
raise AttributeError("path not allowed when supplying an iterator")
Expand All @@ -157,7 +159,7 @@ def flatten(
table_prefix, id_prefix, emit_obj, force,
schema, schema_titles, sqlite_path, threads, log_error,
postgres, postgres_schema, drop, pushdown, sql_scripts, evolve,
no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table)
no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table, truncate)
else:
raise AttributeError("input needs to be a string or a generator of strings, dicts or bytes")

Expand Down Expand Up @@ -241,6 +243,7 @@ def iterator_flatten(*args, **kw):
@click.option('--postgres-schema', default="", help='When loading to postgres, put all tables into this schema.')
@click.option('--evolve', is_flag=True, default=False, help='When loading to postgres or sqlite, evolve tables to fit data')
@click.option('--drop', is_flag=True, default=False, help='When loading to postgres or sqlite, drop table if already exists.')
@click.option('--truncate', is_flag=True, default=False, help='When loading to postgres or sqlite, truncate the table if it alraedy exists.')
@click.option('--id-prefix', default="", help='Prefix for all `_link` id fields')
@click.option('--stats', is_flag=True, default=False, help='Produce stats about the data in the datapackage.json file')
@click.argument('inputs', required=False, nargs=-1)
Expand Down Expand Up @@ -280,6 +283,7 @@ def cli(
stats=False,
json_path="",
arrays_new_table=False,
truncate=False
):
if web:
import pathlib
Expand Down Expand Up @@ -347,6 +351,7 @@ def cli(
stats=stats,
json_path=json_path,
arrays_new_table=arrays_new_table,
truncate=truncate,
)
except IOError:
pass
8 changes: 6 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
low_memory:bool,
gzip_input:bool,
json_path_selector: String,
arrays_new_table: bool
arrays_new_table: bool,
truncate: bool,
) -> Result<()> {

let mut op = Options::default();
Expand Down Expand Up @@ -118,6 +119,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
op.gzip_input = gzip_input;
op.json_path_selector = json_path_selector;
op.arrays_new_table = arrays_new_table;
op.truncate = truncate;


if let Err(err) = flatten_all(input_files, output_dir, op) {
Expand Down Expand Up @@ -169,7 +171,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
low_memory:bool,
gzip_input:bool,
json_path_selector: String,
arrays_new_table: bool
arrays_new_table: bool,
truncate: bool,
) -> Result<()> {
let mut options = Options::default();

Expand Down Expand Up @@ -206,6 +209,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
options.gzip_input = gzip_input;
options.json_path_selector = json_path_selector;
options.arrays_new_table = arrays_new_table;
options.truncate = truncate;

let final_output_path = PathBuf::from(output_dir);
let parts_path = final_output_path.join("parts");
Expand Down

0 comments on commit 91dfbe5

Please sign in to comment.