Skip to content

Commit

Permalink
tidied geojson structure
Browse files Browse the repository at this point in the history
  • Loading branch information
jjcfrancisco committed Aug 7, 2024
1 parent 09561a3 commit 2e34e8b
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 124 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Below are the available commands and flags for Popgis.

`srid`: choose either 4326 (WGS84) or 3857 (Web Mercator). **Optional**. *Default is 4326.*

`mode` (short: `-m`): choose either **overwrite**, **append** or **fail** modes. Read more [here](#modes).
`mode` (short: `-m`): choose either **overwrite** or **fail** modes. Read more [here](#modes).

#### Examples
```bash
Expand All @@ -55,7 +55,7 @@ popgis -i water_polygons.shp \
```

#### Modes
The **overwrite** mode will delete existing table if name of schema/table is the same and will write into the new table. The **append** mode only inserts new data into the existing table. The **fail** mode, it ensures that if the table already exists in the database, the job will fail to prevent data loss.
The **overwrite** mode will delete existing table if name of schema/table is the same and will write into the new table. The **fail** mode, it ensures that if the table already exists in the database, the job will fail to prevent data loss.

## Benchmarks
Although non extensive, the benchmarking shows **Popgis is twice faster than ogr2ogr**. This is most noticeable with large files.
Expand Down
3 changes: 2 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
cd ./target/release/ && ./popgis --input ../../examples/geojson/spain.geojson \
--uri postgresql://pio:password@localhost:25432/popgis \
--schema geojson \
--table spain
--table spain \
--mode=overwrite

@set-tags:
1 change: 0 additions & 1 deletion src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ pub enum Error {

// -- pg
TableExists(String),
CannotAppend(String),

// -- file_types
UnsupportedFileExtension(String),
Expand Down
6 changes: 4 additions & 2 deletions src/file_types/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ use std::path::Path;
use crate::pg::binary_copy::Wkb;

// Struct to hold column name and data type
pub struct NewTableTypes {
pub column_name: String,
// Display
#[derive(Debug)]
pub struct NameAndType {
pub name: String,
pub data_type: Type,
}

Expand Down
63 changes: 31 additions & 32 deletions src/file_types/geojson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,29 @@ use serde_json;
use std::collections::HashMap;
use wkb::geom_to_wkb;

use crate::pg::binary_copy::{infer_geom_type, insert_row};
use crate::pg::crud::{get_stmt, prepare_postgis};
use crate::pg::binary_copy::{infer_geometry_type, insert_row};
use crate::pg::ops::prepare_postgis;
use crate::utils::cli::Cli;

use super::common::{AcceptedTypes, NewTableTypes};
use super::common::{AcceptedTypes, NameAndType};

pub fn insert_data(args: Cli) -> Result<()> {
// Reads through the geojson file and determines the data types
// Fix - it should only read one time
//
// Example:
//
// let data_types, geojson_str = read_geojson(&args.uri)?;
let data_types = determine_data_types(&args.input)?;
// Determine data types of the input file
let file_data_types = determine_file_data_types(&args.input)?;
// Prepare database
prepare_postgis(&args, &file_data_types)?;

// Creates the necessary schema and table in PostGIS
prepare_postgis(&args, &data_types)?;

// Infer the geometry type
let stmt = get_stmt(&args.table, &args.schema, &args.uri)?;
let geom_type = infer_geom_type(stmt)?;

// Prepare types for binary copy
// This is unnecessary -> refactor soon
// Get data types
let mut types: Vec<Type> = Vec::new();
for column in data_types.iter() {
for column in file_data_types.iter() {
types.push(column.data_type.clone());
}
// Get geometry type
let geom_type = infer_geometry_type(&args.table, &args.schema, &args.uri)?;
// Add geometry type to types
types.push(geom_type);

// Read geojson file
let geojson = read_geojson(&args.input)?;
match geojson {
GeoJson::FeatureCollection(fc) => {
Expand Down Expand Up @@ -68,7 +61,7 @@ pub fn insert_data(args: Cli) -> Result<()> {
.expect("Failed to convert geojson::Geometry to geo::Geometry ✘");
let wkb = geom_to_wkb(&geom).expect("Could not convert geometry to WKB ✘");
row.push(AcceptedTypes::Geometry(Some(Wkb { geometry: wkb })));
insert_row(row, &data_types, &types, &args)?;
insert_row(row, &file_data_types, &types, &args)?;
}
println!("Data sucessfully inserted into database ✓");
}
Expand All @@ -78,7 +71,7 @@ pub fn insert_data(args: Cli) -> Result<()> {
Ok(())
}

pub fn determine_data_types(file_path: &str) -> Result<Vec<NewTableTypes>> {
pub fn determine_file_data_types(file_path: &str) -> Result<Vec<NameAndType>> {
let mut table_config: HashMap<String, Type> = HashMap::new();
let geojson_str = std::fs::read_to_string(file_path)?;
let geojson = geojson_str.parse::<GeoJson>().unwrap();
Expand Down Expand Up @@ -151,15 +144,15 @@ pub fn determine_data_types(file_path: &str) -> Result<Vec<NewTableTypes>> {
_ => println!("Not a feature collection ✘"),
}

let mut data_types: Vec<NewTableTypes> = Vec::new();
for (column_name, data_type) in table_config {
data_types.push(NewTableTypes {
column_name,
data_type,
let mut names_and_types: Vec<NameAndType> = Vec::new();
for (name, data_type) in table_config.iter() {
names_and_types.push(NameAndType {
name: name.to_string(),
data_type: data_type.clone(),
});
}

Ok(data_types)
Ok(names_and_types)
}

pub fn read_geojson(file_path: &str) -> Result<GeoJson> {
Expand All @@ -173,12 +166,12 @@ mod tests {
use super::*;

#[test]
fn test_determine_data_types() {
fn test_determine_file_data_types() {
let file_path = "examples/geojson/spain.geojson";
let data_types = determine_data_types(file_path).unwrap();
let data_types = determine_file_data_types(file_path).unwrap();
assert_eq!(data_types.len(), 3);
for data_type in data_types {
match data_type.column_name.as_str() {
match data_type.name.as_str() {
"source" => assert_eq!(data_type.data_type, Type::TEXT),
"id" => assert_eq!(data_type.data_type, Type::TEXT),
"name" => assert_eq!(data_type.data_type, Type::TEXT),
Expand All @@ -191,6 +184,12 @@ mod tests {
fn test_read_geojson() {
let file_path = "examples/geojson/spain.geojson";
let rows = read_geojson(file_path).unwrap();
assert_eq!(rows.row.len(), 19);
match rows {
GeoJson::FeatureCollection(fc) => {
let features = fc.features;
assert_eq!(features.len(), 19);
}
_ => (),
}
}
}
10 changes: 5 additions & 5 deletions src/file_types/shapefile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ use std::collections::HashMap;
use postgres::types::Type;
use shapefile::dbase::FieldValue;

use crate::file_types::common::{AcceptedTypes, NewTableTypes, Row, Rows};
use crate::file_types::common::{AcceptedTypes, NameAndType, Row, Rows};
use crate::file_types::geo::to_geo;
use crate::pg::binary_copy::Wkb;
use wkb::geom_to_wkb;

pub fn determine_data_types(file_path: &str) -> Result<Vec<NewTableTypes>> {
pub fn determine_data_types(file_path: &str) -> Result<Vec<NameAndType>> {
let mut table_config: HashMap<String, Type> = HashMap::new();
let mut reader = shapefile::Reader::from_path(file_path)?;
for shape_record in reader.iter_shapes_and_records() {
Expand Down Expand Up @@ -99,10 +99,10 @@ pub fn determine_data_types(file_path: &str) -> Result<Vec<NewTableTypes>> {
}
}

let mut data_types: Vec<NewTableTypes> = Vec::new();
let mut data_types: Vec<NameAndType> = Vec::new();
for (column_name, data_type) in table_config.iter() {
data_types.push(NewTableTypes {
column_name: column_name.clone(),
data_types.push(NameAndType {
name: column_name.clone(),
data_type: data_type.clone(),
});
}
Expand Down
16 changes: 11 additions & 5 deletions src/pg/binary_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use std::error::Error;
use postgres::binary_copy::BinaryCopyInWriter;
use postgres::CopyInWriter;

use crate::pg::crud::create_connection;
use crate::file_types::common::{AcceptedTypes, NewTableTypes};
use crate::pg::ops::create_connection;
use crate::file_types::common::{AcceptedTypes, NameAndType};
use crate::utils::cli::Cli;

#[derive(Debug)]
Expand All @@ -34,14 +34,20 @@ impl ToSql for Wkb {
to_sql_checked!();
}

pub fn infer_geom_type(stmt: Statement) -> Result<Type> {
pub fn infer_geometry_type(table_name: &str, schema_name: &Option<String>, uri: &str) -> Result<Type> {
let mut client = create_connection(uri)?;
let stmt = if let Some(schema) = schema_name {
client.prepare(&format!("SELECT geom FROM {}.{}", schema, table_name))?
} else {
client.prepare(&format!("SELECT geom FROM {}", table_name))?
};
let column = stmt.columns().first().expect("Failed to get columns ✘");
Ok(column.type_().clone())
}

pub fn insert_row(
row: Vec<AcceptedTypes>,
config: &[NewTableTypes],
config: &[NameAndType],
types: &Vec<Type>,
args: &Cli,
) -> Result<()> {
Expand All @@ -59,7 +65,7 @@ pub fn insert_row(
}
query.push_str(" (");
for column in config.iter() {
query.push_str(&format!("{},", column.column_name));
query.push_str(&format!("{},", column.name));
}
query.push_str("geom) FROM stdin BINARY");
let writer: CopyInWriter = client.copy_in(&query)?;
Expand Down
2 changes: 1 addition & 1 deletion src/pg/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pub mod binary_copy;
pub mod crud;
pub mod ops;
53 changes: 8 additions & 45 deletions src/pg/crud.rs → src/pg/ops.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
use crate::{Error, Result};
use crate::{file_types::common::NameAndType, Error, Result};
use postgres::types::Type;
use postgres::Statement;

use crate::utils::cli::Cli;
use postgres::{Client, NoTls};

use crate::file_types::common::NewTableTypes;

pub fn prepare_postgis(args: &Cli, config: &[NewTableTypes]) -> Result<()> {

pub fn prepare_postgis(args: &Cli, config: &[NameAndType]) -> Result<()> {
// If schema present, create schema
if let Some(schema) = &args.schema {
let schema_exists = create_schema(schema, &args.uri)?;
Expand Down Expand Up @@ -42,20 +38,10 @@ pub fn create_schema(schema_name: &str, uri: &str) -> Result<bool> {
}
}

pub fn get_stmt(table_name: &str, schema_name: &Option<String>, uri: &str) -> Result<Statement> {
let mut client = create_connection(uri)?;
let stmt = if let Some(schema) = schema_name {
client.prepare(&format!("SELECT geom FROM {}.{}", schema, table_name))?
} else {
client.prepare(&format!("SELECT geom FROM {}", table_name))?
};
Ok(stmt)
}

pub fn create_table(
table_name: &str,
schema_name: &Option<String>,
config: &[NewTableTypes],
config: &[NameAndType],
uri: &str,
srid: &Option<usize>,
) -> Result<()> {
Expand All @@ -69,20 +55,21 @@ pub fn create_table(
for column in config.iter() {
match column.data_type {
Type::INT8 => {
query.push_str(&format!("{} INT,", column.column_name));
query.push_str(&format!("{} INT,", column.name));
}
Type::FLOAT8 => {
query.push_str(&format!("{} DOUBLE PRECISION,", column.column_name));
query.push_str(&format!("{} DOUBLE PRECISION,", column.name));
}
Type::TEXT => {
query.push_str(&format!("{} TEXT,", column.column_name));
query.push_str(&format!("{} TEXT,", column.name));
}
Type::BOOL => {
query.push_str(&format!("{} BOOL,", column.column_name));
query.push_str(&format!("{} BOOL,", column.name));
}
_ => println!("Type currently not supported ✘"),
}
}

// If no srid, default to 4326
if let Some(srid) = srid {
query.push_str(&format!("geom Geometry(Geometry, {})", srid));
Expand All @@ -102,30 +89,6 @@ pub fn create_table(
Ok(())
}

pub fn can_append(table_name: &str, schema_name: &Option<String>, uri: &str) -> Result<()> {
let mut client = create_connection(uri)?;
let query = if let Some(schema) = schema_name {
format!(
"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_schema = '{}' AND table_name = '{}')",
schema, table_name
)
} else {
format!(
"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{}')",
table_name
)
};
let exists: bool = client.query_one(&query, &[])?.get(0);
// If exists, return Ok
if exists {
return Ok(());
} else {
return Err(Error::CannotAppend(
"Cannot append to a table that does NOT exist ✘".into(),
));
}
}

pub fn check_table_exists(table_name: &str, schema_name: &Option<String>, uri: &str) -> Result<()> {
let mut client = create_connection(uri)?;
let query = if let Some(schema) = schema_name {
Expand Down
Loading

0 comments on commit 2e34e8b

Please sign in to comment.