Skip to content

Commit

Permalink
Merge branch 'main' into perf/dynamic-dory-gpu
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobtrombetta authored Oct 17, 2024
2 parents 412e359 + e98d9fc commit b20abad
Show file tree
Hide file tree
Showing 33 changed files with 201 additions and 115 deletions.
4 changes: 4 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ To contribute to this project, you'll need to have Rust installed on your machin
rustup component add rustfmt
rustup component add clippy
```
- You will also need to install [lld](https://lld.llvm.org/) which will be required for running tests. For example on Debian based systems you can install it using the package manager:
```bash
apt install lld
```
If you run into any issues, please refer to the [official Rust documentation](https://www.rust-lang.org/learn/get-started) for troubleshooting and more detailed installation instructions.
Expand Down
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,6 @@ match_wildcard_for_single_variants = "deny"
match_bool = "deny"
manual_assert = "deny"
trivially_copy_pass_by_ref = "deny"
unnecessary_wraps = "deny"
should_panic_without_expect = "deny"
needless_pass_by_value = "deny"
4 changes: 2 additions & 2 deletions crates/proof-of-sql-parser/src/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,13 +262,13 @@ mod tests {
}

#[test]
#[should_panic]
#[should_panic(expected = "Identifier too long: CapacityError: insufficient capacity")]
fn long_names_panic() {
Identifier::new("t".repeat(65));
}

#[test]
#[should_panic]
#[should_panic(expected = "Identifier too long: CapacityError: insufficient capacity")]
fn long_unicode_names_panic() {
Identifier::new("茶".repeat(22));
}
Expand Down
1 change: 1 addition & 0 deletions crates/proof-of-sql-parser/src/intermediate_ast_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ fn we_can_parse_multiple_order_by() {
// TODO: we should be able to pass this test.
// But due to some lalrpop restriction, we aren't.
// This problem will be addressed in a future PR.
#[allow(clippy::should_panic_without_expect)]
#[test]
#[should_panic]
fn we_cannot_parse_order_by_referencing_reserved_keywords_yet() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ mod tests {
use itertools::Itertools;

fn metadata_map_from_owned_table(
table: OwnedTable<Curve25519Scalar>,
table: &OwnedTable<Curve25519Scalar>,
) -> ColumnCommitmentMetadataMap {
let (identifiers, columns): (Vec<&Identifier>, Vec<CommittableColumn>) = table
.inner_table()
Expand All @@ -171,7 +171,7 @@ mod tests {
scalar("scalar_column", [1000, 2000, -1000, 0]),
]);

let metadata_map = metadata_map_from_owned_table(table);
let metadata_map = metadata_map_from_owned_table(&table);

assert_eq!(metadata_map.len(), 4);

Expand Down Expand Up @@ -214,23 +214,23 @@ mod tests {
varchar("varchar_column", ["Lorem", "ipsum"]),
scalar("scalar_column", [1000, 2000]),
]);
let metadata_a = metadata_map_from_owned_table(table_a);
let metadata_a = metadata_map_from_owned_table(&table_a);

let table_b = owned_table([
bigint("bigint_column", [-5, 0, 10]),
int128("int128_column", [300, 400, 500]),
varchar("varchar_column", ["dolor", "sit", "amet"]),
scalar("scalar_column", [-1000, 0, -2000]),
]);
let metadata_b = metadata_map_from_owned_table(table_b);
let metadata_b = metadata_map_from_owned_table(&table_b);

let table_c = owned_table([
bigint("bigint_column", [1, 5, -5, 0, 10]),
int128("int128_column", [100, 200, 300, 400, 500]),
varchar("varchar_column", ["Lorem", "ipsum", "dolor", "sit", "amet"]),
scalar("scalar_column", [1000, 2000, -1000, 0, -2000]),
]);
let metadata_c = metadata_map_from_owned_table(table_c);
let metadata_c = metadata_map_from_owned_table(&table_c);

assert_eq!(metadata_a.try_union(metadata_b).unwrap(), metadata_c);
}
Expand All @@ -242,15 +242,15 @@ mod tests {
varchar("varchar_column", ["Lorem", "ipsum"]),
scalar("scalar_column", [1000, 2000]),
]);
let metadata_a = metadata_map_from_owned_table(table_a);
let metadata_a = metadata_map_from_owned_table(&table_a);

let table_b = owned_table([
bigint("bigint_column", [1, 5, -5, 0, 10]),
int128("int128_column", [100, 200, 300, 400, 500]),
varchar("varchar_column", ["Lorem", "ipsum", "dolor", "sit", "amet"]),
scalar("scalar_column", [1000, 2000, -1000, 0, -2000]),
]);
let metadata_b = metadata_map_from_owned_table(table_b);
let metadata_b = metadata_map_from_owned_table(&table_b);

let b_difference_a = metadata_b.try_difference(metadata_a.clone()).unwrap();

Expand Down Expand Up @@ -297,13 +297,13 @@ mod tests {
varchar("varchar_column", ["Lorem", "ipsum", "dolor", "sit", "amet"]),
scalar("scalar_column", [1000, 2000, -1000, 0, -2000]),
]);
let metadata_a = metadata_map_from_owned_table(table_a);
let metadata_a = metadata_map_from_owned_table(&table_a);

let table_b = owned_table([
bigint("bigint_column", [1, 5, -5, 0, 10]),
varchar("varchar_column", ["Lorem", "ipsum", "dolor", "sit", "amet"]),
]);
let metadata_b = metadata_map_from_owned_table(table_b);
let metadata_b = metadata_map_from_owned_table(&table_b);

assert!(matches!(
metadata_a.clone().try_union(metadata_b.clone()),
Expand Down Expand Up @@ -337,25 +337,25 @@ mod tests {
let strings = ["Lorem", "ipsum", "dolor", "sit"];

let ab_ii_metadata =
metadata_map_from_owned_table(owned_table([bigint(id_a, ints), bigint(id_b, ints)]));
metadata_map_from_owned_table(&owned_table([bigint(id_a, ints), bigint(id_b, ints)]));

let ab_iv_metadata = metadata_map_from_owned_table(owned_table([
let ab_iv_metadata = metadata_map_from_owned_table(&owned_table([
bigint(id_a, ints),
varchar(id_b, strings),
]));

let ab_vi_metadata = metadata_map_from_owned_table(owned_table([
let ab_vi_metadata = metadata_map_from_owned_table(&owned_table([
varchar(id_a, strings),
bigint(id_b, ints),
]));

let ad_ii_metadata =
metadata_map_from_owned_table(owned_table([bigint(id_a, ints), bigint(id_d, ints)]));
metadata_map_from_owned_table(&owned_table([bigint(id_a, ints), bigint(id_d, ints)]));

let cb_ii_metadata =
metadata_map_from_owned_table(owned_table([bigint(id_c, ints), bigint(id_b, ints)]));
metadata_map_from_owned_table(&owned_table([bigint(id_c, ints), bigint(id_b, ints)]));

let cd_vv_metadata = metadata_map_from_owned_table(owned_table([
let cd_vv_metadata = metadata_map_from_owned_table(&owned_table([
varchar(id_c, strings),
varchar(id_d, strings),
]));
Expand Down
50 changes: 43 additions & 7 deletions crates/proof-of-sql/src/base/database/column_operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use super::{ColumnOperationError, ColumnOperationResult};
use crate::base::{
database::ColumnType,
math::decimal::{scale_scalar, DecimalError, Precision, MAX_SUPPORTED_PRECISION},
math::decimal::{scale_scalar, DecimalError, Precision},
scalar::Scalar,
};
use alloc::{format, string::ToString, vec::Vec};
Expand Down Expand Up @@ -535,7 +535,13 @@ where
// If scale difference is above max decimal precision values
// are equal if they are both zero and unequal otherwise
let upscale = max_scale - lhs_scale;
if upscale > MAX_SUPPORTED_PRECISION as i8 {
if i8::try_from(
right_column_type
.precision_value()
.expect("Decimal types have scale"),
)
.is_ok_and(|precision| upscale > precision)
{
lhs.iter()
.zip(rhs.iter())
.map(|(l, r)| -> bool { l.is_zero() && *r == S::ZERO })
Expand All @@ -550,7 +556,13 @@ where
}
} else if rhs_scale < max_scale {
let upscale = max_scale - rhs_scale;
if upscale > MAX_SUPPORTED_PRECISION as i8 {
if i8::try_from(
left_column_type
.precision_value()
.expect("Numeric types have scale"),
)
.is_ok_and(|precision| upscale > precision)
{
lhs.iter()
.zip(rhs.iter())
.map(|(l, r)| -> bool { l.is_zero() && *r == S::ZERO })
Expand Down Expand Up @@ -596,7 +608,13 @@ where
// always have larger absolute value than the other one as long as it is nonzero
// Hence a (extremely upscaled) <= b if and only if a < 0 or (a == 0 and b >= 0)
let upscale = max_scale - lhs_scale;
if upscale > MAX_SUPPORTED_PRECISION as i8 {
if i8::try_from(
right_column_type
.precision_value()
.expect("Decimal types have scale"),
)
.is_ok_and(|precision| upscale > precision)
{
lhs.iter()
.zip(rhs.iter())
.map(|(l, r)| -> bool {
Expand All @@ -616,7 +634,13 @@ where
}
} else if rhs_scale < max_scale {
let upscale = max_scale - rhs_scale;
if upscale > MAX_SUPPORTED_PRECISION as i8 {
if i8::try_from(
left_column_type
.precision_value()
.expect("Numeric types have scale"),
)
.is_ok_and(|precision| upscale > precision)
{
// Similarly with extreme scaling we have
// a <= (extremely upscaled) b if and only if a < 0 or (a == 0 and b >= 0)
lhs.iter()
Expand Down Expand Up @@ -669,7 +693,13 @@ where
// always have larger absolute value than the other one as long as it is nonzero
// Hence a (extremely upscaled) >= b if and only if a > 0 or (a == 0 and b <= 0)
let upscale = max_scale - lhs_scale;
if upscale > MAX_SUPPORTED_PRECISION as i8 {
if i8::try_from(
right_column_type
.precision_value()
.expect("Decimal types have scale"),
)
.is_ok_and(|precision| upscale > precision)
{
lhs.iter()
.zip(rhs.iter())
.map(|(l, r)| -> bool {
Expand All @@ -689,7 +719,13 @@ where
}
} else if rhs_scale < max_scale {
let upscale = max_scale - rhs_scale;
if upscale > MAX_SUPPORTED_PRECISION as i8 {
if i8::try_from(
left_column_type
.precision_value()
.expect("Numeric types have scale"),
)
.is_ok_and(|precision| upscale > precision)
{
// Similarly with extreme scaling we have
// a >= (extremely upscaled) b if and only if b < 0 or (a >= 0 and b == 0)
lhs.iter()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,30 @@ use arrow::{
};

fn we_can_convert_between_owned_column_and_array_ref_impl(
owned_column: OwnedColumn<Curve25519Scalar>,
owned_column: &OwnedColumn<Curve25519Scalar>,
array_ref: ArrayRef,
) {
let ic_to_ar = ArrayRef::from(owned_column.clone());
let ar_to_ic = OwnedColumn::try_from(array_ref.clone()).unwrap();

assert!(ic_to_ar == array_ref);
assert_eq!(owned_column, ar_to_ic);
assert_eq!(*owned_column, ar_to_ic);
}
fn we_can_convert_between_boolean_owned_column_and_array_ref_impl(data: Vec<bool>) {
we_can_convert_between_owned_column_and_array_ref_impl(
OwnedColumn::<Curve25519Scalar>::Boolean(data.clone()),
&OwnedColumn::<Curve25519Scalar>::Boolean(data.clone()),
Arc::new(BooleanArray::from(data)),
);
}
fn we_can_convert_between_bigint_owned_column_and_array_ref_impl(data: Vec<i64>) {
we_can_convert_between_owned_column_and_array_ref_impl(
OwnedColumn::<Curve25519Scalar>::BigInt(data.clone()),
&OwnedColumn::<Curve25519Scalar>::BigInt(data.clone()),
Arc::new(Int64Array::from(data)),
);
}
fn we_can_convert_between_int128_owned_column_and_array_ref_impl(data: Vec<i128>) {
we_can_convert_between_owned_column_and_array_ref_impl(
OwnedColumn::<Curve25519Scalar>::Int128(data.clone()),
&OwnedColumn::<Curve25519Scalar>::Int128(data.clone()),
Arc::new(
Decimal128Array::from(data)
.with_precision_and_scale(38, 0)
Expand All @@ -48,7 +48,7 @@ fn we_can_convert_between_int128_owned_column_and_array_ref_impl(data: Vec<i128>
}
fn we_can_convert_between_varchar_owned_column_and_array_ref_impl(data: Vec<String>) {
we_can_convert_between_owned_column_and_array_ref_impl(
OwnedColumn::<Curve25519Scalar>::VarChar(data.clone()),
&OwnedColumn::<Curve25519Scalar>::VarChar(data.clone()),
Arc::new(StringArray::from(data)),
);
}
Expand Down Expand Up @@ -81,37 +81,37 @@ fn we_get_an_unsupported_type_error_when_trying_to_convert_from_a_float32_array_
}

fn we_can_convert_between_owned_table_and_record_batch_impl(
owned_table: OwnedTable<Curve25519Scalar>,
record_batch: RecordBatch,
owned_table: &OwnedTable<Curve25519Scalar>,
record_batch: &RecordBatch,
) {
let it_to_rb = RecordBatch::try_from(owned_table.clone()).unwrap();
let rb_to_it = OwnedTable::try_from(record_batch.clone()).unwrap();

assert_eq!(it_to_rb, record_batch);
assert_eq!(rb_to_it, owned_table);
assert_eq!(it_to_rb, *record_batch);
assert_eq!(rb_to_it, *owned_table);
}
#[test]
fn we_can_convert_between_owned_table_and_record_batch() {
we_can_convert_between_owned_table_and_record_batch_impl(
OwnedTable::<Curve25519Scalar>::try_new(IndexMap::default()).unwrap(),
RecordBatch::new_empty(Arc::new(Schema::empty())),
&OwnedTable::<Curve25519Scalar>::try_new(IndexMap::default()).unwrap(),
&RecordBatch::new_empty(Arc::new(Schema::empty())),
);
we_can_convert_between_owned_table_and_record_batch_impl(
owned_table([
&owned_table([
bigint("int64", [0; 0]),
int128("int128", [0; 0]),
varchar("string", ["0"; 0]),
boolean("boolean", [true; 0]),
]),
record_batch!(
&record_batch!(
"int64" => [0_i64; 0],
"int128" => [0_i128; 0],
"string" => ["0"; 0],
"boolean" => [true; 0],
),
);
we_can_convert_between_owned_table_and_record_batch_impl(
owned_table([
&owned_table([
bigint("int64", [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]),
int128("int128", [0, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]),
varchar("string", ["0", "1", "2", "3", "4", "5", "6", "7", "8"]),
Expand All @@ -120,7 +120,7 @@ fn we_can_convert_between_owned_table_and_record_batch() {
[true, false, true, false, true, false, true, false, true],
),
]),
record_batch!(
&record_batch!(
"int64" => [0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX],
"int128" => [0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX],
"string" => ["0", "1", "2", "3", "4", "5", "6", "7", "8"],
Expand All @@ -142,7 +142,7 @@ fn we_cannot_convert_a_record_batch_if_it_has_repeated_column_names() {
}

#[test]
#[should_panic]
#[should_panic(expected = "not implemented: Cannot convert Scalar type to arrow type")]
fn we_panic_when_converting_an_owned_table_with_a_scalar_column() {
let owned_table = owned_table::<Curve25519Scalar>([scalar("a", [0; 0])]);
let _ = RecordBatch::try_from(owned_table);
Expand Down
Loading

0 comments on commit b20abad

Please sign in to comment.