Skip to content

Commit 48ae6a8

Browse files
Bound Expressions Serde
1 parent df01d88 commit 48ae6a8

File tree

9 files changed

+1106
-16
lines changed

9 files changed

+1106
-16
lines changed

api/src/main/java/org/apache/iceberg/expressions/Expressions.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,38 @@ public static <T> UnboundTerm<T> truncate(String name, int width) {
102102
return new UnboundTransform<>(ref(name), Transforms.truncate(width));
103103
}
104104

105+
@SuppressWarnings("unchecked")
106+
public static <S, T> UnboundTerm<T> bucket(NamedReference<S> ref, int numBuckets) {
107+
Transform<S, T> transform = (Transform<S, T>) Transforms.bucket(numBuckets);
108+
return new UnboundTransform<>(ref, transform);
109+
}
110+
111+
@SuppressWarnings("unchecked")
112+
public static <S, T> UnboundTerm<T> year(NamedReference<S> ref) {
113+
return new UnboundTransform<>(ref, (Transform<S, T>) Transforms.year());
114+
}
115+
116+
@SuppressWarnings("unchecked")
117+
public static <S, T> UnboundTerm<T> month(NamedReference<S> ref) {
118+
return new UnboundTransform<>(ref, (Transform<S, T>) Transforms.month());
119+
}
120+
121+
@SuppressWarnings("unchecked")
122+
public static <S, T> UnboundTerm<T> day(NamedReference<S> ref) {
123+
return new UnboundTransform<>(ref, (Transform<S, T>) Transforms.day());
124+
}
125+
126+
@SuppressWarnings("unchecked")
127+
public static <S, T> UnboundTerm<T> hour(NamedReference<S> ref) {
128+
return new UnboundTransform<>(ref, (Transform<S, T>) Transforms.hour());
129+
}
130+
131+
@SuppressWarnings("unchecked")
132+
public static <S, T> UnboundTerm<T> truncate(NamedReference<S> ref, int width) {
133+
Transform<S, T> transform = (Transform<S, T>) Transforms.truncate(width);
134+
return new UnboundTransform<>(ref, transform);
135+
}
136+
105137
public static <T> UnboundTerm<T> extract(String name, String path, String type) {
106138
return new UnboundExtract<>(ref(name), path, type);
107139
}
@@ -305,6 +337,21 @@ public static <T> NamedReference<T> ref(String name) {
305337
return new NamedReference<>(name);
306338
}
307339

340+
/**
341+
* Constructs a resolved reference for a given column.
342+
*
343+
* <p>The following are equivalent: equals("a", 5) and if a fieldId is 1 and equals(ref("a", 1),
344+
* 5).
345+
*
346+
* @param name a column name
347+
* @param fieldId the field ID of the column
348+
* @param <T> the Java type of this reference
349+
* @return a named reference
350+
*/
351+
static <T> IDReference<T> ref(String name, int fieldId) {
352+
return new IDReference<>(name, fieldId);
353+
}
354+
308355
/**
309356
* Constructs a transform expression for a given column.
310357
*
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.iceberg.expressions;
20+
21+
import org.apache.iceberg.Schema;
22+
import org.apache.iceberg.exceptions.ValidationException;
23+
import org.apache.iceberg.types.Types;
24+
25+
/**
26+
* A reference to a field by ID rather than name. This extends NamedReference because it is a more
27+
* specific in that the name has already been resolved to a field ID. Because the name has been
28+
* resolved, the name is informational.
29+
*/
30+
public class IDReference<T> extends NamedReference<T> {
31+
private final int id;
32+
33+
public IDReference(String name, int id) {
34+
super(name);
35+
this.id = id;
36+
}
37+
38+
public int id() {
39+
return id;
40+
}
41+
42+
@Override
43+
public BoundReference<T> bind(Types.StructType struct, boolean caseSensitive) {
44+
Schema schema = struct.asSchema();
45+
Types.NestedField field = schema.findField(id);
46+
ValidationException.check(
47+
field != null, "Cannot find field by id %s in struct: %s", id, schema.asStruct());
48+
49+
return new BoundReference<>(field, schema.accessorForField(field.fieldId()), name());
50+
}
51+
52+
@Override
53+
public String toString() {
54+
return String.format("ref(name=\"%s\", id=\"%s\")", name(), id);
55+
}
56+
}

api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,10 @@ public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive)
5050
"Cannot bind: %s cannot transform %s values from '%s'",
5151
transform,
5252
boundRef.type(),
53-
ref.name());
53+
ref);
5454
} catch (IllegalArgumentException e) {
5555
throw new ValidationException(
56-
"Cannot bind: %s cannot transform %s values from '%s'",
57-
transform, boundRef.type(), ref.name());
56+
"Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref);
5857
}
5958

6059
return new BoundTransform<>(boundRef, transform);

api/src/test/java/org/apache/iceberg/expressions/TestExpressionSerialization.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,17 @@ public void testExpressions() throws Exception {
6161
Expressions.notIn("s", "abc", "xyz").bind(schema.asStruct()),
6262
Expressions.isNull("a").bind(schema.asStruct()),
6363
Expressions.startsWith("s", "abc").bind(schema.asStruct()),
64-
Expressions.notStartsWith("s", "xyz").bind(schema.asStruct())
64+
Expressions.notStartsWith("s", "xyz").bind(schema.asStruct()),
65+
// IDReference tests
66+
Expressions.equal(Expressions.ref("a", 34), 5),
67+
Expressions.in(Expressions.ref("s", 35), "abc", "xyz"),
68+
Expressions.notNull(Expressions.ref("a", 34)),
69+
Expressions.isNull(Expressions.ref("a", 34)),
70+
Expressions.startsWith(Expressions.ref("s", 35), "test"),
71+
// IDReference bound tests
72+
Expressions.equal(Expressions.ref("a", 34), 5).bind(schema.asStruct()),
73+
Expressions.in(Expressions.ref("s", 35), "abc", "xyz").bind(schema.asStruct()),
74+
Expressions.notNull(Expressions.ref("a", 34)).bind(schema.asStruct())
6575
};
6676

6777
for (Expression expression : expressions) {
@@ -196,6 +206,15 @@ private static boolean equals(Reference<?> left, Reference<?> right) {
196206
NamedReference rref = (NamedReference) right;
197207

198208
return lref.name().equals(rref.name());
209+
} else if (left instanceof IDReference) {
210+
if (!(right instanceof IDReference)) {
211+
return false;
212+
}
213+
214+
IDReference lref = (IDReference) left;
215+
IDReference rref = (IDReference) right;
216+
217+
return lref.id() == rref.id() && lref.name().equals(rref.name());
199218
} else if (left instanceof BoundReference) {
200219
if (!(right instanceof BoundReference)) {
201220
return false;
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.iceberg.expressions;
20+
21+
import static org.assertj.core.api.Assertions.assertThat;
22+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
23+
24+
import org.apache.iceberg.Schema;
25+
import org.apache.iceberg.exceptions.ValidationException;
26+
import org.apache.iceberg.types.Types;
27+
import org.junit.jupiter.api.Test;
28+
29+
public class TestIDReference {
30+
private static final Schema SCHEMA =
31+
new Schema(
32+
Types.NestedField.optional(34, "a", Types.IntegerType.get()),
33+
Types.NestedField.required(35, "s", Types.StringType.get()));
34+
35+
@Test
36+
public void testIDReferenceEquality() {
37+
IDReference<Integer> ref1 = new IDReference<>("a", 34);
38+
IDReference<Integer> ref2 = new IDReference<>("a", 34);
39+
IDReference<Integer> ref3 = new IDReference<>("b", 34);
40+
IDReference<Integer> ref4 = new IDReference<>("a", 35);
41+
42+
// Equal references
43+
assertThat(ref1.id()).isEqualTo(ref2.id());
44+
assertThat(ref1.name()).isEqualTo(ref2.name());
45+
46+
// Different names, same fieldId
47+
assertThat(ref1.id()).isEqualTo(ref3.id());
48+
assertThat(ref1.name()).isNotEqualTo(ref3.name());
49+
50+
// Same name, different fieldId
51+
assertThat(ref1).isNotEqualTo(ref4);
52+
}
53+
54+
@Test
55+
public void testIDReferenceBind() {
56+
IDReference<Integer> ref = new IDReference<>("a", 34);
57+
BoundReference<Integer> bound = ref.bind(SCHEMA.asStruct(), true);
58+
59+
assertThat(bound).isInstanceOf(BoundReference.class);
60+
assertThat(bound.fieldId()).isEqualTo(34);
61+
assertThat(bound.name()).isEqualTo("a");
62+
assertThat(bound.type()).isEqualTo(Types.IntegerType.get());
63+
}
64+
65+
@Test
66+
public void testIDReferenceBindIgnoresCaseSensitivity() {
67+
IDReference<Integer> ref = new IDReference<>("A", 34);
68+
69+
// Should work regardless of case sensitivity since we use fieldId
70+
BoundReference<Integer> bound1 = ref.bind(SCHEMA.asStruct(), true);
71+
BoundReference<Integer> bound2 = ref.bind(SCHEMA.asStruct(), false);
72+
73+
assertThat(bound1).isInstanceOf(BoundReference.class);
74+
assertThat(bound2).isInstanceOf(BoundReference.class);
75+
assertThat(bound1.fieldId()).isEqualTo(34);
76+
assertThat(bound2.fieldId()).isEqualTo(34);
77+
}
78+
79+
@Test
80+
public void testIDReferenceBindWithInvalidId() {
81+
IDReference<Integer> ref = new IDReference<>("invalid", 999);
82+
83+
assertThatThrownBy(() -> ref.bind(SCHEMA.asStruct(), true))
84+
.isInstanceOf(ValidationException.class)
85+
.hasMessageContaining(
86+
"Cannot find field by id 999 in struct: struct<34: a: optional int, 35: s: required string>");
87+
}
88+
89+
@Test
90+
public void testIDReferenceRef() {
91+
IDReference<Integer> ref = new IDReference<>("a", 34);
92+
NamedReference<?> namedRef = ref.ref();
93+
94+
assertThat(namedRef.name()).isEqualTo("a");
95+
}
96+
97+
@Test
98+
public void testResolvedReferenceToString() {
99+
IDReference<Integer> ref = new IDReference<>("a", 34);
100+
101+
assertThat(ref.toString()).isEqualTo("ref(name=\"a\", id=\"34\")");
102+
}
103+
104+
@Test
105+
public void testIDReferenceExpressionIntegration() {
106+
// Test that IDReference works in expression predicates
107+
UnboundPredicate<?> expr = Expressions.equal(Expressions.ref("a", 34), 5);
108+
assertThat(expr).isInstanceOf(UnboundPredicate.class);
109+
110+
assertThat(expr.term()).isInstanceOf(IDReference.class);
111+
112+
IDReference<?> resolvedRef = (IDReference<?>) expr.term();
113+
assertThat(resolvedRef.name()).isEqualTo("a");
114+
assertThat(resolvedRef.id()).isEqualTo(34);
115+
}
116+
117+
@Test
118+
public void testIDReferenceUnbind() {
119+
// Test that unbinding a bound reference returns a NamedReference for compatibility
120+
Expression expr = Expressions.equal(Expressions.ref("a", 34), 5);
121+
Expression boundExpr = Binder.bind(SCHEMA.asStruct(), expr, true);
122+
123+
assertThat(boundExpr).isInstanceOf(BoundPredicate.class);
124+
BoundPredicate<?> boundPred = (BoundPredicate<?>) boundExpr;
125+
126+
UnboundTerm<?> unbound = ExpressionUtil.unbind(boundPred.term());
127+
assertThat(unbound).isInstanceOf(NamedReference.class);
128+
129+
NamedReference<?> namedRef = (NamedReference<?>) unbound;
130+
assertThat(namedRef.name()).isEqualTo("a");
131+
}
132+
}

0 commit comments

Comments
 (0)