From c72c899fb329f7e39f7d19d412d39525f0cb3829 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 8 Oct 2024 16:21:39 +0800 Subject: [PATCH] Feat: Support json array functions --- src/functions.rs | 354 ++++++++++++++++++++++++++++++++++++++++++ src/jentry.rs | 2 +- tests/it/functions.rs | 212 ++++++++++++++++++++++++- 3 files changed, 561 insertions(+), 7 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index cae96cf..208d267 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -16,6 +16,7 @@ use core::convert::TryInto; use std::borrow::Cow; use std::cmp::Ordering; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::collections::VecDeque; use std::str::from_utf8; use std::str::from_utf8_unchecked; @@ -2486,6 +2487,359 @@ fn delete_jsonb_by_index(value: &[u8], index: i32, buf: &mut Vec) -> Result< Ok(()) } +/// Insert a new value into a JSONB array value by the specified position. +pub fn array_insert( + value: &[u8], + pos: i32, + new_value: &[u8], + buf: &mut Vec, +) -> Result<(), Error> { + if !is_jsonb(value) { + let value = parse_value(value)?; + let mut val_buf = Vec::new(); + value.write_to_vec(&mut val_buf); + if !is_jsonb(new_value) { + let new_value = parse_value(new_value)?; + let mut new_val_buf = Vec::new(); + new_value.write_to_vec(&mut new_val_buf); + return array_insert_jsonb(&val_buf, pos, &new_val_buf, buf); + } + return array_insert_jsonb(&val_buf, pos, new_value, buf); + } + array_insert_jsonb(value, pos, new_value, buf) +} + +fn array_insert_jsonb( + value: &[u8], + pos: i32, + new_value: &[u8], + buf: &mut Vec, +) -> Result<(), Error> { + let header = read_u32(value, 0)?; + let len = if header & CONTAINER_HEADER_TYPE_MASK == ARRAY_CONTAINER_TAG { + (header & CONTAINER_HEADER_LEN_MASK) as i32 + } else { + 1 + }; + + let idx = if pos < 0 { len - pos.abs() } else { pos }; + let idx = if idx < 0 { + 0 + } else if idx > len { + len + } else { + idx + } as usize; + let len = len as usize; + + let mut items = VecDeque::with_capacity(len); + match header & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + for (jentry, item) in iterate_array(value, header) { + items.push_back((jentry, item)); + } + } + OBJECT_CONTAINER_TAG => { + let jentry = JEntry::make_container_jentry(value.len()); + items.push_back((jentry, value)); + } + _ => { + let encoded = read_u32(value, 4)?; + let jentry = JEntry::decode_jentry(encoded); + items.push_back((jentry, &value[8..])); + } + } + + let mut builder = ArrayBuilder::new(len + 1); + if idx > 0 { + let mut i = 0; + while let Some((jentry, item)) = items.pop_front() { + builder.push_raw(jentry, item); + i += 1; + if i >= idx { + break; + } + } + } + + let new_header = read_u32(new_value, 0)?; + match new_header & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG | OBJECT_CONTAINER_TAG => { + let new_jentry = JEntry::make_container_jentry(new_value.len()); + builder.push_raw(new_jentry, new_value); + } + _ => { + let encoded = read_u32(new_value, 4)?; + let new_jentry = JEntry::decode_jentry(encoded); + builder.push_raw(new_jentry, &new_value[8..]); + } + } + + while let Some((jentry, item)) = items.pop_front() { + builder.push_raw(jentry, item); + } + builder.build_into(buf); + + Ok(()) +} + +/// Return a JSONB Array that contains only the distinct elements from the input JSONB Array. +pub fn array_distinct(value: &[u8], buf: &mut Vec) -> Result<(), Error> { + if !is_jsonb(value) { + let value = parse_value(value)?; + let mut val_buf = Vec::new(); + value.write_to_vec(&mut val_buf); + return array_distinct_jsonb(&val_buf, buf); + } + array_distinct_jsonb(value, buf) +} + +fn array_distinct_jsonb(value: &[u8], buf: &mut Vec) -> Result<(), Error> { + let header = read_u32(value, 0)?; + let mut builder = ArrayBuilder::new(0); + match header & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + let mut item_set = BTreeSet::new(); + for (jentry, item) in iterate_array(value, header) { + if !item_set.contains(&(jentry.clone(), item)) { + item_set.insert((jentry.clone(), item)); + builder.push_raw(jentry, item); + } + } + } + OBJECT_CONTAINER_TAG => { + let jentry = JEntry::make_container_jentry(value.len()); + builder.push_raw(jentry, value); + } + _ => { + let encoded = read_u32(value, 4)?; + let jentry = JEntry::decode_jentry(encoded); + builder.push_raw(jentry, &value[8..]); + } + } + builder.build_into(buf); + + Ok(()) +} + +/// Return a JSONB Array that contains the matching elements in the two input JSONB Arrays. +pub fn array_intersection(value1: &[u8], value2: &[u8], buf: &mut Vec) -> Result<(), Error> { + if !is_jsonb(value1) { + let value1 = parse_value(value1)?; + let mut val_buf1 = Vec::new(); + value1.write_to_vec(&mut val_buf1); + if !is_jsonb(value2) { + let value2 = parse_value(value2)?; + let mut val_buf2 = Vec::new(); + value2.write_to_vec(&mut val_buf2); + return array_intersection_jsonb(&val_buf1, &val_buf2, buf); + } + return array_intersection_jsonb(&val_buf1, value2, buf); + } + array_intersection_jsonb(value1, value2, buf) +} + +fn array_intersection_jsonb(value1: &[u8], value2: &[u8], buf: &mut Vec) -> Result<(), Error> { + let header1 = read_u32(value1, 0)?; + let header2 = read_u32(value2, 0)?; + + let mut item_map = BTreeMap::new(); + match header2 & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + for (jentry2, item2) in iterate_array(value2, header2) { + if let Some(cnt) = item_map.get_mut(&(jentry2.clone(), item2)) { + *cnt += 1; + } else { + item_map.insert((jentry2, item2), 1); + } + } + } + OBJECT_CONTAINER_TAG => { + let jentry2 = JEntry::make_container_jentry(value2.len()); + item_map.insert((jentry2, value2), 1); + } + _ => { + let encoded = read_u32(value2, 4)?; + let jentry2 = JEntry::decode_jentry(encoded); + item_map.insert((jentry2, &value2[8..]), 1); + } + } + + let mut builder = ArrayBuilder::new(0); + match header1 & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + for (jentry1, item1) in iterate_array(value1, header1) { + if let Some(cnt) = item_map.get_mut(&(jentry1.clone(), item1)) { + if *cnt > 0 { + *cnt -= 1; + builder.push_raw(jentry1, item1); + } + } + } + } + OBJECT_CONTAINER_TAG => { + let jentry1 = JEntry::make_container_jentry(value1.len()); + if item_map.contains_key(&(jentry1.clone(), value1)) { + builder.push_raw(jentry1, value1); + } + } + _ => { + let encoded = read_u32(value1, 4)?; + let jentry1 = JEntry::decode_jentry(encoded); + if item_map.contains_key(&(jentry1.clone(), &value1[8..])) { + builder.push_raw(jentry1, &value1[8..]); + } + } + } + builder.build_into(buf); + + Ok(()) +} + +/// Return a JSONB Array that contains the elements from one input JSONB Array +/// that are not in another input JSONB Array. +pub fn array_except(value1: &[u8], value2: &[u8], buf: &mut Vec) -> Result<(), Error> { + if !is_jsonb(value1) { + let value1 = parse_value(value1)?; + let mut val_buf1 = Vec::new(); + value1.write_to_vec(&mut val_buf1); + if !is_jsonb(value2) { + let value2 = parse_value(value2)?; + let mut val_buf2 = Vec::new(); + value2.write_to_vec(&mut val_buf2); + return array_except_jsonb(&val_buf1, &val_buf2, buf); + } + return array_except_jsonb(&val_buf1, value2, buf); + } + array_except_jsonb(value1, value2, buf) +} + +fn array_except_jsonb(value1: &[u8], value2: &[u8], buf: &mut Vec) -> Result<(), Error> { + let header1 = read_u32(value1, 0)?; + let header2 = read_u32(value2, 0)?; + + let mut item_map = BTreeMap::new(); + match header2 & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + for (jentry2, item2) in iterate_array(value2, header2) { + if let Some(cnt) = item_map.get_mut(&(jentry2.clone(), item2)) { + *cnt += 1; + } else { + item_map.insert((jentry2, item2), 1); + } + } + } + OBJECT_CONTAINER_TAG => { + let jentry2 = JEntry::make_container_jentry(value2.len()); + item_map.insert((jentry2, value2), 1); + } + _ => { + let encoded = read_u32(value2, 4)?; + let jentry2 = JEntry::decode_jentry(encoded); + item_map.insert((jentry2, &value2[8..]), 1); + } + } + + let mut builder = ArrayBuilder::new(0); + match header1 & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + for (jentry1, item1) in iterate_array(value1, header1) { + if let Some(cnt) = item_map.get_mut(&(jentry1.clone(), item1)) { + if *cnt > 0 { + *cnt -= 1; + continue; + } + } + builder.push_raw(jentry1, item1); + } + } + OBJECT_CONTAINER_TAG => { + let jentry1 = JEntry::make_container_jentry(value1.len()); + if !item_map.contains_key(&(jentry1.clone(), value1)) { + builder.push_raw(jentry1, value1); + } + } + _ => { + let encoded = read_u32(value1, 4)?; + let jentry1 = JEntry::decode_jentry(encoded); + if !item_map.contains_key(&(jentry1.clone(), &value1[8..])) { + builder.push_raw(jentry1, &value1[8..]); + } + } + } + builder.build_into(buf); + + Ok(()) +} + +/// Compares whether two JSONB Arrays have at least one element in common. +/// Return TRUE if there is at least one element in common; otherwise return FALSE. +pub fn array_overlap(value1: &[u8], value2: &[u8]) -> Result { + if !is_jsonb(value1) { + let value1 = parse_value(value1)?; + let mut val_buf1 = Vec::new(); + value1.write_to_vec(&mut val_buf1); + if !is_jsonb(value2) { + let value2 = parse_value(value2)?; + let mut val_buf2 = Vec::new(); + value2.write_to_vec(&mut val_buf2); + return array_overlap_jsonb(&val_buf1, &val_buf2); + } + return array_overlap_jsonb(&val_buf1, value2); + } + array_overlap_jsonb(value1, value2) +} + +fn array_overlap_jsonb(value1: &[u8], value2: &[u8]) -> Result { + let header1 = read_u32(value1, 0)?; + let header2 = read_u32(value2, 0)?; + + let mut item_set = BTreeSet::new(); + match header2 & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + for (jentry2, item2) in iterate_array(value2, header2) { + if !item_set.contains(&(jentry2.clone(), item2)) { + item_set.insert((jentry2, item2)); + } + } + } + OBJECT_CONTAINER_TAG => { + let jentry2 = JEntry::make_container_jentry(value2.len()); + item_set.insert((jentry2, value2)); + } + _ => { + let encoded = read_u32(value2, 4)?; + let jentry2 = JEntry::decode_jentry(encoded); + item_set.insert((jentry2, &value2[8..])); + } + } + + match header1 & CONTAINER_HEADER_TYPE_MASK { + ARRAY_CONTAINER_TAG => { + for (jentry1, item1) in iterate_array(value1, header1) { + if item_set.contains(&(jentry1, item1)) { + return Ok(true); + } + } + } + OBJECT_CONTAINER_TAG => { + let jentry1 = JEntry::make_container_jentry(value1.len()); + if item_set.contains(&(jentry1, value1)) { + return Ok(true); + } + } + _ => { + let encoded = read_u32(value1, 4)?; + let jentry1 = JEntry::decode_jentry(encoded); + if item_set.contains(&(jentry1, &value1[8..])) { + return Ok(true); + } + } + } + + Ok(false) +} + /// Deletes all object fields that have null values from the given JSON value, recursively. /// Null values that are not object fields are untouched. pub fn strip_nulls(value: &[u8], buf: &mut Vec) -> Result<(), Error> { diff --git a/src/jentry.rs b/src/jentry.rs index 4380518..5035487 100644 --- a/src/jentry.rs +++ b/src/jentry.rs @@ -14,7 +14,7 @@ use super::constants::*; -#[derive(Debug)] +#[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord)] pub(crate) struct JEntry { pub(crate) type_code: u32, pub(crate) length: u32, diff --git a/tests/it/functions.rs b/tests/it/functions.rs index ef85604..ddead8a 100644 --- a/tests/it/functions.rs +++ b/tests/it/functions.rs @@ -17,12 +17,13 @@ use std::cmp::Ordering; use std::collections::BTreeMap; use jsonb::{ - array_length, array_values, as_bool, as_null, as_number, as_str, build_array, build_object, - compare, concat, contains, convert_to_comparable, delete_by_index, delete_by_keypath, - delete_by_name, exists_all_keys, exists_any_keys, from_slice, get_by_index, get_by_keypath, - get_by_name, get_by_path, get_by_path_array, is_array, is_object, keypath::parse_key_paths, - object_each, object_keys, parse_value, path_exists, path_match, strip_nulls, to_bool, to_f64, - to_i64, to_pretty_string, to_serde_json, to_serde_json_object, to_str, to_string, to_u64, + array_distinct, array_except, array_insert, array_intersection, array_length, array_overlap, + array_values, as_bool, as_null, as_number, as_str, build_array, build_object, compare, concat, + contains, convert_to_comparable, delete_by_index, delete_by_keypath, delete_by_name, + exists_all_keys, exists_any_keys, from_slice, get_by_index, get_by_keypath, get_by_name, + get_by_path, get_by_path_array, is_array, is_object, keypath::parse_key_paths, object_each, + object_keys, parse_value, path_exists, path_match, strip_nulls, to_bool, to_f64, to_i64, + to_pretty_string, to_serde_json, to_serde_json_object, to_str, to_string, to_u64, traverse_check_string, type_of, Error, Number, Object, Value, }; @@ -1538,6 +1539,205 @@ fn test_delete_by_keypath() { } } +#[test] +fn test_array_insert() { + let sources = vec![ + (r#"[0,1,2,3]"#, 2, r#""hello""#, r#"[0,1,"hello",2,3]"#), + (r#"[0,1,2,3]"#, 10, r#"100"#, r#"[0,1,2,3,100]"#), + (r#"[0,1,2,3]"#, 0, r#"true"#, r#"[true,0,1,2,3]"#), + (r#"[0,1,2,3]"#, -1, r#"{"k":"v"}"#, r#"[0,1,2,{"k":"v"},3]"#), + (r#"1"#, 1, r#"{"k":"v"}"#, r#"[1,{"k":"v"}]"#), + (r#"{"k":"v"}"#, 2, r#"true"#, r#"[{"k":"v"},true]"#), + ]; + for (val, pos, new_val, result) in sources { + { + let val = val.as_bytes(); + let new_val = new_val.as_bytes(); + let mut buf = Vec::new(); + array_insert(val, pos, new_val, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + { + let val = parse_value(val.as_bytes()).unwrap().to_vec(); + let new_val = parse_value(new_val.as_bytes()).unwrap().to_vec(); + let mut buf = Vec::new(); + array_insert(&val, pos, &new_val, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + } +} + +#[test] +fn test_array_distinct() { + let sources = vec![ + (r#"[0,1,1,2,2,2,3,4]"#, r#"[0,1,2,3,4]"#), + (r#"["A","A","B","C","A","C"]"#, r#"["A","B","C"]"#), + ( + r#"["A","A",10,false,null,false,null,10]"#, + r#"["A",10,false,null]"#, + ), + (r#"[[1,2,2],3,4,[1,2,2]]"#, r#"[[1,2,2],3,4]"#), + ( + r#"[{"k":"v"},"A","A","B",{"k":"v"}]"#, + r#"[{"k":"v"},"A","B"]"#, + ), + (r#"1"#, r#"[1]"#), + (r#"{"k":"v"}"#, r#"[{"k":"v"}]"#), + ]; + for (val, result) in sources { + { + let val = val.as_bytes(); + let mut buf = Vec::new(); + array_distinct(val, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + { + let val = parse_value(val.as_bytes()).unwrap().to_vec(); + let mut buf = Vec::new(); + array_distinct(&val, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + } +} + +#[test] +fn test_array_intersection() { + let sources = vec![ + (r#"["A","B","C"]"#, r#"["B","C"]"#, r#"["B","C"]"#), + (r#"["A","B","B","B","C"]"#, r#"["B","B"]"#, r#"["B","B"]"#), + (r#"[1,2]"#, r#"[3,4]"#, r#"[]"#), + (r#"[null,102,null]"#, r#"[null,null,103]"#, r#"[null,null]"#), + ( + r#"[{"a":1,"b":2},1,2]"#, + r#"[{"a":1,"b":2},3,4]"#, + r#"[{"a":1,"b":2}]"#, + ), + (r#"[{"a":1,"b":2},1,2]"#, r#"[{"a":2,"c":3},3,4]"#, r#"[]"#), + ( + r#"[{"a":1,"b":2,"c":3}]"#, + r#"[{"c":3,"b":2,"a":1},3,4]"#, + r#"[{"a":1,"b":2,"c":3}]"#, + ), + (r#"1"#, r#"1"#, r#"[1]"#), + (r#"1"#, r#"2"#, r#"[]"#), + (r#"{"k":"v"}"#, r#"{"k":"v"}"#, r#"[{"k":"v"}]"#), + ]; + for (val1, val2, result) in sources { + { + let val1 = val1.as_bytes(); + let val2 = val2.as_bytes(); + let mut buf = Vec::new(); + array_intersection(val1, val2, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + { + let val1 = parse_value(val1.as_bytes()).unwrap().to_vec(); + let val2 = parse_value(val2.as_bytes()).unwrap().to_vec(); + let mut buf = Vec::new(); + array_intersection(&val1, &val2, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + } +} + +#[test] +fn test_array_except() { + let sources = vec![ + (r#"["A","B","C"]"#, r#"["B","C"]"#, r#"["A"]"#), + ( + r#"["A","B","B","B","C"]"#, + r#"["B","B"]"#, + r#"["A","B","C"]"#, + ), + (r#"[1,2]"#, r#"[3,4]"#, r#"[1,2]"#), + (r#"[null,102,null]"#, r#"[null,null,103]"#, r#"[102]"#), + ( + r#"[{"a":1,"b":2},1,2]"#, + r#"[{"a":1,"b":2},3,4]"#, + r#"[1,2]"#, + ), + ( + r#"[{"a":1,"b":2},1,2]"#, + r#"[{"a":2,"c":3},3,4]"#, + r#"[{"a":1,"b":2},1,2]"#, + ), + ( + r#"[{"a":1,"b":2,"c":3}]"#, + r#"[{"c":3,"b":2,"a":1},3,4]"#, + r#"[]"#, + ), + (r#"1"#, r#"1"#, r#"[]"#), + (r#"1"#, r#"2"#, r#"[1]"#), + (r#"{"k":"v"}"#, r#"{"k":"v"}"#, r#"[]"#), + ]; + for (val1, val2, result) in sources { + { + let val1 = val1.as_bytes(); + let val2 = val2.as_bytes(); + let mut buf = Vec::new(); + array_except(val1, val2, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + { + let val1 = parse_value(val1.as_bytes()).unwrap().to_vec(); + let val2 = parse_value(val2.as_bytes()).unwrap().to_vec(); + let mut buf = Vec::new(); + array_except(&val1, &val2, &mut buf).unwrap(); + let actual = from_slice(&buf).unwrap(); + let expected = parse_value(result.as_bytes()).unwrap(); + assert_eq!(actual, expected); + } + } +} + +#[test] +fn test_array_overlap() { + let sources = vec![ + (r#"["A","B","C"]"#, r#"["B","C"]"#, true), + (r#"["A","B","B","B","C"]"#, r#"["B","B"]"#, true), + (r#"[1,2]"#, r#"[3,4]"#, false), + (r#"[null,102,null]"#, r#"[null,null,103]"#, true), + (r#"[{"a":1,"b":2},1,2]"#, r#"[{"a":1,"b":2},3,4]"#, true), + (r#"[{"a":1,"b":2},1,2]"#, r#"[{"a":2,"c":3},3,4]"#, false), + ( + r#"[{"a":1,"b":2,"c":3}]"#, + r#"[{"c":3,"b":2,"a":1},3,4]"#, + true, + ), + (r#"1"#, r#"1"#, true), + (r#"1"#, r#"2"#, false), + (r#"{"k":"v"}"#, r#"{"k":"v"}"#, true), + ]; + for (val1, val2, expected) in sources { + { + let val1 = val1.as_bytes(); + let val2 = val2.as_bytes(); + let actual = array_overlap(val1, val2).unwrap(); + assert_eq!(actual, expected); + } + { + let val1 = parse_value(val1.as_bytes()).unwrap().to_vec(); + let val2 = parse_value(val2.as_bytes()).unwrap().to_vec(); + let actual = array_overlap(&val1, &val2).unwrap(); + assert_eq!(actual, expected); + } + } +} + #[test] fn test_to_serde_json() { let sources = vec![