Skip to content

Commit

Permalink
feat: add json and jsonl drivers
Browse files Browse the repository at this point in the history
  • Loading branch information
brianheineman committed Dec 15, 2024
1 parent 59cd808 commit f776261
Show file tree
Hide file tree
Showing 15 changed files with 306 additions and 23 deletions.
26 changes: 14 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,18 @@ visit the [rsql](https://theseus-rs.github.io/rsql/rsql_cli/) site.

## Features

| Feature | |
|-----------------------|--------------------------------------------------------------------------------------------------------------------------------|
| Databases | CockroachDB, CSV, Delimited, DuckDB, LibSQL (Turso), MariaDB, MySQL, PostgreSQL, Redshift, Snowflake, SQLite3, SQL Server, TSV |
| Syntax Highlighting ||
| Result Highlighting ||
| Query Auto-completion ||
| History ||
| SQL File Execution ||
| Embedded PostgreSQL ||
| Output Formats | ascii, csv, expanded, html, json, jsonl, markdown, plain, psql, sqlite, tsv, unicode, xml, yaml |
| Localized Interface | 40+ languages¹ |
| Key Bindings | emacs, vi |
| Feature | |
|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------|
| Databases | CockroachDB, CSV, Delimited, DuckDB, JSON, JSONL, LibSQL (Turso), MariaDB, MySQL, PostgreSQL, Redshift, Snowflake, SQLite3, SQL Server, TSV |
| Syntax Highlighting | |
| Result Highlighting | |
| Query Auto-completion | |
| History | |
| SQL File Execution | |
| Embedded PostgreSQL | |
| Output Formats | ascii, csv, expanded, html, json, jsonl, markdown, plain, psql, sqlite, tsv, unicode, xml, yaml |
| Localized Interface | 40+ languages¹ |
| Key Bindings | emacs, vi |

¹ Computer translations; human translations welcome

Expand All @@ -73,6 +73,8 @@ rsql --url "<url>" -- "<query>"
| csv (polars) | `csv://?file=<file>[&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| delimited (polars) | `delimited://?file=<file>[&separator=<char>][&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| duckdb | `duckdb://?<memory=true>[&file=<database_file>]` |
| json (polars) | `json://?file=<file>` |
| jsonl (polars) | `jsonl://?file=<file>` |
| libsql¹ | `libsql://<host>?[<memory=true>][&file=<database_file>][&auth_token=<token>]` |
| mariadb (sqlx) | `mariadb://<user>[:<password>]@<host>[:<port>]/<database>` |
| mysql (sqlx) | `mysql://<user>[:<password>]@<host>[:<port>]/<database>` |
Expand Down
10 changes: 10 additions & 0 deletions datasets/users.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"id": 1,
"name": "John Doe"
},
{
"id": 2,
"name": "Jane Smith"
}
]
3 changes: 3 additions & 0 deletions datasets/users.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"id": 1,"name": "John Doe"}
{"id": 2,"name": "Jane Smith"}

2 changes: 2 additions & 0 deletions rsql_cli/docs/src/chapter2/drivers/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ The drivers command displays the available database drivers.
| `csv` | Comma Separated Value (CSV) file driver provided by [Polars](https://github.com/pola-rs/polars) | `csv://?file=<file>[&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| `delimited` | Delimited file driver provided by [Polars](https://github.com/pola-rs/polars) | `delimited://?file=<file>[&separator=<char>][&has_header=<true/false>][&quote=<char>][&skip_rows=<n>]` |
| `duckdb` | DuckDB provided by [DuckDB](https://duckdb.org/) | `duckdb://?<memory=true>[&file=<database_file>]` |
| `json` | JSON file driver provided by [Polars](https://github.com/pola-rs/polars) | `json://?file=<file>` |
| `jsonl` | JSONL file driver provided by [Polars](https://github.com/pola-rs/polars) | `jsonl://?file=<file>` |
| `libsql` | LibSQL provided by [Turso](https://github.com/tursodatabase/libsql) | `libsql://<host>?[<memory=true>][&file=<database_file>][&auth_token=<token>]` |
| `mariadb` | MariaDB provided by [SQLx](https://github.com/launchbadge/sqlx) | `mariadb://<user>[:<password>]@<host>[:<port>]/<database>` |
| `mysql` | MySQL provided by [SQLx](https://github.com/launchbadge/sqlx) | `mysql://<user>[:<password>]@<host>[:<port>]/<database>` |
Expand Down
4 changes: 4 additions & 0 deletions rsql_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ all-drivers = [
"driver-csv",
"driver-delimited",
"driver-duckdb",
"driver-json",
"driver-jsonl",
"driver-mariadb",
"driver-mysql",
"driver-postgres",
Expand All @@ -77,6 +79,8 @@ driver-cockroachdb = ["rsql_drivers/cockroachdb"]
driver-csv = ["rsql_drivers/csv"]
driver-delimited = ["rsql_drivers/delimited"]
driver-duckdb = ["rsql_drivers/duckdb"]
driver-json = ["rsql_drivers/json"]
driver-jsonl = ["rsql_drivers/jsonl"]
driver-libsql = ["rsql_drivers/libsql"]
driver-mariadb = ["rsql_drivers/mariadb"]
driver-mysql = ["rsql_drivers/mysql"]
Expand Down
4 changes: 4 additions & 0 deletions rsql_core/src/commands/drivers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ mod tests {
"delimited",
#[cfg(feature = "driver-duckdb")]
"duckdb",
#[cfg(feature = "driver-json")]
"json",
#[cfg(feature = "driver-jsonl")]
"jsonl",
#[cfg(feature = "driver-libsql")]
"libsql",
#[cfg(feature = "driver-mariadb")]
Expand Down
10 changes: 10 additions & 0 deletions rsql_drivers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ all = [
"csv",
"delimited",
"duckdb",
"json",
"jsonl",
"mariadb",
"mysql",
"postgres",
Expand All @@ -96,6 +98,14 @@ delimited = [
duckdb = [
"dep:duckdb"
]
json = [
"dep:polars",
"dep:polars-sql",
]
jsonl = [
"dep:polars",
"dep:polars-sql",
]
libsql = [
"dep:libsql"
]
Expand Down
12 changes: 2 additions & 10 deletions rsql_drivers/src/delimited/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use polars_sql::SQLContext;
use std::collections::HashMap;
use std::fmt::Debug;
use std::fs::File;
use std::path::Path;
use url::Url;

#[derive(Debug)]
Expand Down Expand Up @@ -94,16 +93,9 @@ impl crate::Driver for Driver {
.into_reader_with_file_handle(file)
.finish()?;

// Use the file name prefix as the table name
let file_name = Path::new(file_name)
.file_name()
.ok_or(InvalidUrl("Invalid file name".to_string()))?
.to_str()
.ok_or(InvalidUrl("Invalid file name".to_string()))?;
let table_name = file_name.split('.').next().unwrap_or(file_name);

let table_name = crate::polars::driver::get_table_name(file_name)?;
let mut context = SQLContext::new();
context.register(table_name, data_frame.lazy());
context.register(table_name.as_str(), data_frame.lazy());

let connection = Connection::new(url, context).await?;
Ok(Box::new(connection))
Expand Down
10 changes: 10 additions & 0 deletions rsql_drivers/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ impl Default for DriverManager {
drivers.add(Box::new(crate::delimited::Driver));
#[cfg(feature = "duckdb")]
drivers.add(Box::new(crate::duckdb::Driver));
#[cfg(feature = "json")]
drivers.add(Box::new(crate::json::Driver));
#[cfg(feature = "jsonl")]
drivers.add(Box::new(crate::jsonl::Driver));
#[cfg(feature = "libsql")]
drivers.add(Box::new(crate::libsql::Driver));
#[cfg(feature = "mariadb")]
Expand Down Expand Up @@ -157,6 +161,12 @@ mod tests {
#[cfg(feature = "duckdb")]
let driver_count = driver_count + 1;

#[cfg(feature = "json")]
let driver_count = driver_count + 1;

#[cfg(feature = "jsonl")]
let driver_count = driver_count + 1;

#[cfg(feature = "libsql")]
let driver_count = driver_count + 1;

Expand Down
112 changes: 112 additions & 0 deletions rsql_drivers/src/json/driver.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::{ConversionError, InvalidUrl};
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, JsonReader};
use polars_sql::SQLContext;
use std::collections::HashMap;
use std::fs::File;
use std::num::NonZeroUsize;
use url::Url;

#[derive(Debug)]
pub struct Driver;

#[async_trait]
impl crate::Driver for Driver {
fn identifier(&self) -> &'static str {
"json"
}

async fn connect(
&self,
url: String,
_password: Option<String>,
) -> Result<Box<dyn crate::Connection>> {
let parsed_url = Url::parse(url.as_str())?;
let query_parameters: HashMap<String, String> =
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file = File::open(file_name)?;
let ignore_errors = query_parameters
.get("ignore_errors")
.map_or(false, |v| v == "true");
let infer_schema_length = match query_parameters.get("infer_schema_length") {
Some(infer_schema_length) => {
let length = infer_schema_length
.parse::<usize>()
.map_err(|error| ConversionError(error.to_string()))?;
if length == 0 {
None
} else {
NonZeroUsize::new(length)
}
}
None => NonZeroUsize::new(100),
};

let data_frame = JsonReader::new(file)
.infer_schema_len(infer_schema_length)
.set_rechunk(true)
.with_ignore_errors(ignore_errors)
.finish()?;

let table_name = crate::polars::driver::get_table_name(file_name)?;
let mut context = SQLContext::new();
context.register(table_name.as_str(), data_frame.lazy());

let connection = Connection::new(url, context).await?;
Ok(Box::new(connection))
}
}

#[cfg(test)]
mod test {
use crate::{DriverManager, Value};

const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("json://?file={CRATE_DIRECTORY}/../datasets/users.json")
}

#[tokio::test]
async fn test_driver_connect() -> anyhow::Result<()> {
let database_url = database_url();
let driver_manager = DriverManager::default();
let mut connection = driver_manager.connect(&database_url).await?;
assert_eq!(&database_url, connection.url());
connection.close().await?;
Ok(())
}

#[tokio::test]
async fn test_connection_interface() -> anyhow::Result<()> {
let database_url = database_url();
let driver_manager = DriverManager::default();
let mut connection = driver_manager.connect(&database_url).await?;

let mut query_result = connection
.query("SELECT id, name FROM users ORDER BY id")
.await?;

assert_eq!(query_result.columns().await, vec!["id", "name"]);
assert_eq!(
query_result.next().await,
Some(vec![Value::I64(1), Value::String("John Doe".to_string())])
);
assert_eq!(
query_result.next().await,
Some(vec![Value::I64(2), Value::String("Jane Smith".to_string())])
);
assert!(query_result.next().await.is_none());

connection.close().await?;
Ok(())
}
}
3 changes: 3 additions & 0 deletions rsql_drivers/src/json/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod driver;

pub use driver::Driver;
112 changes: 112 additions & 0 deletions rsql_drivers/src/jsonl/driver.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use crate::error::Result;
use crate::polars::Connection;
use crate::Error::{ConversionError, InvalidUrl};
use async_trait::async_trait;
use polars::io::SerReader;
use polars::prelude::{IntoLazy, JsonLineReader};
use polars_sql::SQLContext;
use std::collections::HashMap;
use std::fs::File;
use std::num::NonZeroUsize;
use url::Url;

#[derive(Debug)]
pub struct Driver;

#[async_trait]
impl crate::Driver for Driver {
fn identifier(&self) -> &'static str {
"jsonl"
}

async fn connect(
&self,
url: String,
_password: Option<String>,
) -> Result<Box<dyn crate::Connection>> {
let parsed_url = Url::parse(url.as_str())?;
let query_parameters: HashMap<String, String> =
parsed_url.query_pairs().into_owned().collect();

// Read Options
let file_name = query_parameters
.get("file")
.ok_or(InvalidUrl("Missing file parameter".to_string()))?;
let file = File::open(file_name)?;
let ignore_errors = query_parameters
.get("ignore_errors")
.map_or(false, |v| v == "true");
let infer_schema_length = match query_parameters.get("infer_schema_length") {
Some(infer_schema_length) => {
let length = infer_schema_length
.parse::<usize>()
.map_err(|error| ConversionError(error.to_string()))?;
if length == 0 {
None
} else {
NonZeroUsize::new(length)
}
}
None => NonZeroUsize::new(100),
};

let data_frame = JsonLineReader::new(file)
.infer_schema_len(infer_schema_length)
.set_rechunk(true)
.with_ignore_errors(ignore_errors)
.finish()?;

let table_name = crate::polars::driver::get_table_name(file_name)?;
let mut context = SQLContext::new();
context.register(table_name.as_str(), data_frame.lazy());

let connection = Connection::new(url, context).await?;
Ok(Box::new(connection))
}
}

#[cfg(test)]
mod test {
use crate::{DriverManager, Value};

const CRATE_DIRECTORY: &str = env!("CARGO_MANIFEST_DIR");

fn database_url() -> String {
format!("jsonl://?file={CRATE_DIRECTORY}/../datasets/users.jsonl")
}

#[tokio::test]
async fn test_driver_connect() -> anyhow::Result<()> {
let database_url = database_url();
let driver_manager = DriverManager::default();
let mut connection = driver_manager.connect(&database_url).await?;
assert_eq!(&database_url, connection.url());
connection.close().await?;
Ok(())
}

#[tokio::test]
async fn test_connection_interface() -> anyhow::Result<()> {
let database_url = database_url();
let driver_manager = DriverManager::default();
let mut connection = driver_manager.connect(&database_url).await?;

let mut query_result = connection
.query("SELECT id, name FROM users ORDER BY id")
.await?;

assert_eq!(query_result.columns().await, vec!["id", "name"]);
assert_eq!(
query_result.next().await,
Some(vec![Value::I64(1), Value::String("John Doe".to_string())])
);
assert_eq!(
query_result.next().await,
Some(vec![Value::I64(2), Value::String("Jane Smith".to_string())])
);
assert!(query_result.next().await.is_none());

connection.close().await?;
Ok(())
}
}
Loading

0 comments on commit f776261

Please sign in to comment.