Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into cleanup-gcp-crede…
Browse files Browse the repository at this point in the history
…ntials
  • Loading branch information
tustvold committed Oct 16, 2023
2 parents 07f1899 + 31bc84c commit 19dac61
Show file tree
Hide file tree
Showing 41 changed files with 1,722 additions and 724 deletions.
134 changes: 79 additions & 55 deletions arrow-csv/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,53 +70,36 @@ use csv::ByteRecord;
use std::io::Write;

use crate::map_csv_error;

const DEFAULT_DATE_FORMAT: &str = "%F";
const DEFAULT_TIME_FORMAT: &str = "%T";
const DEFAULT_TIMESTAMP_FORMAT: &str = "%FT%H:%M:%S.%9f";
const DEFAULT_TIMESTAMP_TZ_FORMAT: &str = "%FT%H:%M:%S.%9f%:z";
const DEFAULT_NULL_VALUE: &str = "";

/// A CSV writer
#[derive(Debug)]
pub struct Writer<W: Write> {
/// The object to write to
writer: csv::Writer<W>,
/// Whether file should be written with headers. Defaults to `true`
/// Whether file should be written with headers, defaults to `true`
has_headers: bool,
/// The date format for date arrays
/// The date format for date arrays, defaults to RFC3339
date_format: Option<String>,
/// The datetime format for datetime arrays
/// The datetime format for datetime arrays, defaults to RFC3339
datetime_format: Option<String>,
/// The timestamp format for timestamp arrays
/// The timestamp format for timestamp arrays, defaults to RFC3339
timestamp_format: Option<String>,
/// The timestamp format for timestamp (with timezone) arrays
/// The timestamp format for timestamp (with timezone) arrays, defaults to RFC3339
timestamp_tz_format: Option<String>,
/// The time format for time arrays
/// The time format for time arrays, defaults to RFC3339
time_format: Option<String>,
/// Is the beginning-of-writer
beginning: bool,
/// The value to represent null entries
null_value: String,
/// The value to represent null entries, defaults to [`DEFAULT_NULL_VALUE`]
null_value: Option<String>,
}

impl<W: Write> Writer<W> {
/// Create a new CsvWriter from a writable object, with default options
pub fn new(writer: W) -> Self {
let delimiter = b',';
let mut builder = csv::WriterBuilder::new();
let writer = builder.delimiter(delimiter).from_writer(writer);
Writer {
writer,
has_headers: true,
date_format: Some(DEFAULT_DATE_FORMAT.to_string()),
datetime_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
time_format: Some(DEFAULT_TIME_FORMAT.to_string()),
timestamp_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
timestamp_tz_format: Some(DEFAULT_TIMESTAMP_TZ_FORMAT.to_string()),
beginning: true,
null_value: DEFAULT_NULL_VALUE.to_string(),
}
WriterBuilder::new().with_delimiter(delimiter).build(writer)
}

/// Write a vector of record batches to a writable object
Expand All @@ -138,7 +121,7 @@ impl<W: Write> Writer<W> {
}

let options = FormatOptions::default()
.with_null(&self.null_value)
.with_null(self.null_value.as_deref().unwrap_or(DEFAULT_NULL_VALUE))
.with_date_format(self.date_format.as_deref())
.with_datetime_format(self.datetime_format.as_deref())
.with_timestamp_format(self.timestamp_format.as_deref())
Expand Down Expand Up @@ -207,9 +190,9 @@ impl<W: Write> RecordBatchWriter for Writer<W> {
#[derive(Clone, Debug)]
pub struct WriterBuilder {
/// Optional column delimiter. Defaults to `b','`
delimiter: Option<u8>,
delimiter: u8,
/// Whether to write column names as file headers. Defaults to `true`
has_headers: bool,
has_header: bool,
/// Optional date format for date arrays
date_format: Option<String>,
/// Optional datetime format for datetime arrays
Expand All @@ -227,14 +210,14 @@ pub struct WriterBuilder {
impl Default for WriterBuilder {
fn default() -> Self {
Self {
has_headers: true,
delimiter: None,
date_format: Some(DEFAULT_DATE_FORMAT.to_string()),
datetime_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
time_format: Some(DEFAULT_TIME_FORMAT.to_string()),
timestamp_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
timestamp_tz_format: Some(DEFAULT_TIMESTAMP_TZ_FORMAT.to_string()),
null_value: Some(DEFAULT_NULL_VALUE.to_string()),
has_header: true,
delimiter: b',',
date_format: None,
datetime_format: None,
time_format: None,
timestamp_format: None,
timestamp_tz_format: None,
null_value: None,
}
}
}
Expand All @@ -254,7 +237,7 @@ impl WriterBuilder {
/// let file = File::create("target/out.csv").unwrap();
///
/// // create a builder that doesn't write headers
/// let builder = WriterBuilder::new().has_headers(false);
/// let builder = WriterBuilder::new().with_header(false);
/// let writer = builder.build(file);
///
/// writer
Expand All @@ -265,48 +248,92 @@ impl WriterBuilder {
}

/// Set whether to write headers
#[deprecated(note = "Use Self::with_header")]
#[doc(hidden)]
pub fn has_headers(mut self, has_headers: bool) -> Self {
self.has_headers = has_headers;
self.has_header = has_headers;
self
}

/// Set whether to write the CSV file with a header
pub fn with_header(mut self, header: bool) -> Self {
self.has_header = header;
self
}

/// Returns `true` if this writer is configured to write a header
pub fn header(&self) -> bool {
self.has_header
}

/// Set the CSV file's column delimiter as a byte character
pub fn with_delimiter(mut self, delimiter: u8) -> Self {
self.delimiter = Some(delimiter);
self.delimiter = delimiter;
self
}

/// Get the CSV file's column delimiter as a byte character
pub fn delimiter(&self) -> u8 {
self.delimiter
}

/// Set the CSV file's date format
pub fn with_date_format(mut self, format: String) -> Self {
self.date_format = Some(format);
self
}

/// Get the CSV file's date format if set, defaults to RFC3339
pub fn date_format(&self) -> Option<&str> {
self.date_format.as_deref()
}

/// Set the CSV file's datetime format
pub fn with_datetime_format(mut self, format: String) -> Self {
self.datetime_format = Some(format);
self
}

/// Get the CSV file's datetime format if set, defaults to RFC3339
pub fn datetime_format(&self) -> Option<&str> {
self.datetime_format.as_deref()
}

/// Set the CSV file's time format
pub fn with_time_format(mut self, format: String) -> Self {
self.time_format = Some(format);
self
}

/// Get the CSV file's datetime time if set, defaults to RFC3339
pub fn time_format(&self) -> Option<&str> {
self.time_format.as_deref()
}

/// Set the CSV file's timestamp format
pub fn with_timestamp_format(mut self, format: String) -> Self {
self.timestamp_format = Some(format);
self
}

/// Get the CSV file's timestamp format if set, defaults to RFC3339
pub fn timestamp_format(&self) -> Option<&str> {
self.timestamp_format.as_deref()
}

/// Set the value to represent null in output
pub fn with_null(mut self, null_value: String) -> Self {
self.null_value = Some(null_value);
self
}

/// Use RFC3339 format for date/time/timestamps
/// Get the value to represent null in output
pub fn null(&self) -> &str {
self.null_value.as_deref().unwrap_or(DEFAULT_NULL_VALUE)
}

/// Use RFC3339 format for date/time/timestamps (default)
#[deprecated(note = "Use WriterBuilder::default()")]
pub fn with_rfc3339(mut self) -> Self {
self.date_format = None;
self.datetime_format = None;
Expand All @@ -318,21 +345,18 @@ impl WriterBuilder {

/// Create a new `Writer`
pub fn build<W: Write>(self, writer: W) -> Writer<W> {
let delimiter = self.delimiter.unwrap_or(b',');
let mut builder = csv::WriterBuilder::new();
let writer = builder.delimiter(delimiter).from_writer(writer);
let writer = builder.delimiter(self.delimiter).from_writer(writer);
Writer {
writer,
has_headers: self.has_headers,
beginning: true,
has_headers: self.has_header,
date_format: self.date_format,
datetime_format: self.datetime_format,
time_format: self.time_format,
timestamp_format: self.timestamp_format,
timestamp_tz_format: self.timestamp_tz_format,
beginning: true,
null_value: self
.null_value
.unwrap_or_else(|| DEFAULT_NULL_VALUE.to_string()),
null_value: self.null_value,
}
}
}
Expand Down Expand Up @@ -411,11 +435,11 @@ mod tests {

let expected = r#"c1,c2,c3,c4,c5,c6,c7
Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,cupcakes
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,cupcakes
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
"#;
assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
}
Expand Down Expand Up @@ -512,7 +536,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
let mut file = tempfile::tempfile().unwrap();

let builder = WriterBuilder::new()
.has_headers(false)
.with_header(false)
.with_delimiter(b'|')
.with_null("NULL".to_string())
.with_time_format("%r".to_string());
Expand Down Expand Up @@ -560,7 +584,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
)
.unwrap();

let builder = WriterBuilder::new().has_headers(false);
let builder = WriterBuilder::new().with_header(false);

let mut buf: Cursor<Vec<u8>> = Default::default();
// drop the writer early to release the borrow.
Expand Down Expand Up @@ -652,7 +676,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo

let mut file = tempfile::tempfile().unwrap();

let builder = WriterBuilder::new().with_rfc3339();
let builder = WriterBuilder::new();
let mut writer = builder.build(&mut file);
let batches = vec![&batch];
for batch in batches {
Expand Down
2 changes: 1 addition & 1 deletion arrow-flight/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ tonic = { version = "0.10.0", default-features = false, features = ["transport",

# CLI-related dependencies
anyhow = { version = "1.0", optional = true }
clap = { version = "4.1", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true }
clap = { version = "4.4.6", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage", "wrap_help", "color", "suggestions"], optional = true }
tracing-log = { version = "0.1", optional = true }
tracing-subscriber = { version = "0.3.1", default-features = false, features = ["ansi", "env-filter", "fmt"], optional = true }

Expand Down
32 changes: 30 additions & 2 deletions arrow-flight/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,33 @@ that demonstrate how to build a Flight server implemented with [tonic](https://d
## Feature Flags

- `flight-sql-experimental`: Enables experimental support for
[Apache Arrow FlightSQL](https://arrow.apache.org/docs/format/FlightSql.html),
a protocol for interacting with SQL databases.
[Apache Arrow FlightSQL], a protocol for interacting with SQL databases.

## CLI

This crates offers a basic [Apache Arrow FlightSQL] command line interface.

The client can be installed from the repository:

```console
$ cargo install --features=cli,flight-sql-experimental,tls --bin=flight_sql_client --path=. --locked
```

The client comes with extensive help text:

```console
$ flight_sql_client help
```

A query can be executed using:

```console
$ flight_sql_client --host example.com statement-query "SELECT 1;"
+----------+
| Int64(1) |
+----------+
| 1 |
+----------+
```

[apache arrow flightsql]: https://arrow.apache.org/docs/format/FlightSql.html
Loading

0 comments on commit 19dac61

Please sign in to comment.