Skip to content

Commit b358475

Browse files
committed
Appender: support ENUM columns
This change allows to insert records into `ENUM` columns using `Appender` interface. `ENUM` values are specified as `Strings`, usage example: ```java stmt.execute("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');"); stmt.execute("CREATE TABLE tab1(col1 mood)"); try (DuckDBAppender appender = conn.createAppender("tab1")) { appender.beginRow().append("sad").endRow(); appender.beginRow().append("happy").endRow(); } ``` Additionally `append(byte[])` method now can be used to insert `VARCHAR` records and `BLOB` columns can be used inside `LIST`s or `MAP`s. Testing: new tests added for top-level and nested (inside `LIST`s) `ENUM`s usage. Fix: #416
1 parent 44efeaa commit b358475

File tree

10 files changed

+423
-6
lines changed

10 files changed

+423
-6
lines changed

duckdb_java.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ Java_org_duckdb_DuckDBBindings_duckdb_1create_1struct_1type
6060
Java_org_duckdb_DuckDBBindings_duckdb_1struct_1type_1child_1count
6161
Java_org_duckdb_DuckDBBindings_duckdb_1struct_1type_1child_1name
6262
Java_org_duckdb_DuckDBBindings_duckdb_1array_1type_1array_1size
63+
Java_org_duckdb_DuckDBBindings_duckdb_1enum_1internal_1type
64+
Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1size
65+
Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1value
6366
Java_org_duckdb_DuckDBBindings_duckdb_1destroy_1logical_1type
6467
Java_org_duckdb_DuckDBBindings_duckdb_1create_1vector
6568
Java_org_duckdb_DuckDBBindings_duckdb_1destroy_1vector

duckdb_java.exp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ _Java_org_duckdb_DuckDBBindings_duckdb_1create_1struct_1type
5757
_Java_org_duckdb_DuckDBBindings_duckdb_1struct_1type_1child_1count
5858
_Java_org_duckdb_DuckDBBindings_duckdb_1struct_1type_1child_1name
5959
_Java_org_duckdb_DuckDBBindings_duckdb_1array_1type_1array_1size
60+
_Java_org_duckdb_DuckDBBindings_duckdb_1enum_1internal_1type
61+
_Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1size
62+
_Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1value
6063
_Java_org_duckdb_DuckDBBindings_duckdb_1destroy_1logical_1type
6164
_Java_org_duckdb_DuckDBBindings_duckdb_1create_1vector
6265
_Java_org_duckdb_DuckDBBindings_duckdb_1destroy_1vector

duckdb_java.map

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ DUCKDB_JAVA {
5959
Java_org_duckdb_DuckDBBindings_duckdb_1struct_1type_1child_1count;
6060
Java_org_duckdb_DuckDBBindings_duckdb_1struct_1type_1child_1name;
6161
Java_org_duckdb_DuckDBBindings_duckdb_1array_1type_1array_1size;
62+
Java_org_duckdb_DuckDBBindings_duckdb_1enum_1internal_1type;
63+
Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1size;
64+
Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1value;
6265
Java_org_duckdb_DuckDBBindings_duckdb_1destroy_1logical_1type;
6366
Java_org_duckdb_DuckDBBindings_duckdb_1create_1vector;
6467
Java_org_duckdb_DuckDBBindings_duckdb_1destroy_1vector;

src/jni/bindings_logical_type.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,76 @@ JNIEXPORT jlong JNICALL Java_org_duckdb_DuckDBBindings_duckdb_1array_1type_1arra
291291
return static_cast<jlong>(size);
292292
}
293293

294+
/*
295+
* Class: org_duckdb_DuckDBBindings
296+
* Method: duckdb_enum_internal_type
297+
* Signature: (Ljava/nio/ByteBuffer;)I
298+
*/
299+
JNIEXPORT jint JNICALL Java_org_duckdb_DuckDBBindings_duckdb_1enum_1internal_1type(JNIEnv *env, jclass,
300+
jobject logical_type) {
301+
302+
duckdb_logical_type lt = logical_type_buf_to_logical_type(env, logical_type);
303+
if (env->ExceptionCheck()) {
304+
return -1;
305+
}
306+
307+
duckdb_type type_id = duckdb_enum_internal_type(lt);
308+
309+
return static_cast<jint>(type_id);
310+
}
311+
312+
/*
313+
* Class: org_duckdb_DuckDBBindings
314+
* Method: duckdb_enum_dictionary_size
315+
* Signature: (Ljava/nio/ByteBuffer;)J
316+
*/
317+
JNIEXPORT jlong JNICALL Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1size(JNIEnv *env, jclass,
318+
jobject logical_type) {
319+
320+
duckdb_logical_type lt = logical_type_buf_to_logical_type(env, logical_type);
321+
if (env->ExceptionCheck()) {
322+
return -1;
323+
}
324+
325+
idx_t size = duckdb_enum_dictionary_size(lt);
326+
327+
return static_cast<jlong>(size);
328+
}
329+
330+
/*
331+
* Class: org_duckdb_DuckDBBindings
332+
* Method: duckdb_enum_dictionary_value
333+
* Signature: (Ljava/nio/ByteBuffer;J)[B
334+
*/
335+
JNIEXPORT jbyteArray JNICALL Java_org_duckdb_DuckDBBindings_duckdb_1enum_1dictionary_1value(JNIEnv *env, jclass,
336+
jobject logical_type,
337+
jlong index) {
338+
339+
duckdb_logical_type lt = logical_type_buf_to_logical_type(env, logical_type);
340+
if (env->ExceptionCheck()) {
341+
return nullptr;
342+
}
343+
idx_t index_idx = jlong_to_idx(env, index);
344+
if (env->ExceptionCheck()) {
345+
return nullptr;
346+
}
347+
348+
idx_t size = duckdb_enum_dictionary_size(lt);
349+
if (index_idx >= size) {
350+
env->ThrowNew(J_SQLException, "Invalid enum field index specified");
351+
return nullptr;
352+
}
353+
354+
auto name_ptr = varchar_ptr(duckdb_enum_dictionary_value(lt, index_idx), varchar_deleter);
355+
if (name_ptr.get() == nullptr) {
356+
return nullptr;
357+
}
358+
359+
idx_t len = static_cast<idx_t>(std::strlen(name_ptr.get()));
360+
361+
return make_jbyteArray(env, name_ptr.get(), len);
362+
}
363+
294364
/*
295365
* Class: org_duckdb_DuckDBBindings
296366
* Method: duckdb_destroy_logical_type

src/main/java/org/duckdb/DuckDBAppender.java

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public class DuckDBAppender implements AutoCloseable {
5858

5959
supportedTypes.add(DUCKDB_TYPE_STRUCT.typeId);
6060
supportedTypes.add(DUCKDB_TYPE_UNION.typeId);
61+
supportedTypes.add(DUCKDB_TYPE_ENUM.typeId);
6162
}
6263
private static final CAPIType[] int8Types = new CAPIType[] {DUCKDB_TYPE_TINYINT, DUCKDB_TYPE_UTINYINT};
6364
private static final CAPIType[] int16Types = new CAPIType[] {DUCKDB_TYPE_SMALLINT, DUCKDB_TYPE_USMALLINT};
@@ -69,6 +70,8 @@ public class DuckDBAppender implements AutoCloseable {
6970
private static final CAPIType[] timestampMicrosTypes =
7071
new CAPIType[] {DUCKDB_TYPE_TIMESTAMP, DUCKDB_TYPE_TIMESTAMP_TZ};
7172
private static final CAPIType[] collectionTypes = new CAPIType[] {DUCKDB_TYPE_ARRAY, DUCKDB_TYPE_LIST};
73+
private static final CAPIType[] varlenTypes = new CAPIType[] {DUCKDB_TYPE_VARCHAR, DUCKDB_TYPE_BLOB};
74+
private static final CAPIType[] varcharOrEnumTypes = new CAPIType[] {DUCKDB_TYPE_VARCHAR, DUCKDB_TYPE_ENUM};
7275

7376
private static final int STRING_MAX_INLINE_BYTES = 12;
7477

@@ -564,7 +567,7 @@ public DuckDBAppender appendByteArray(byte[][] values, boolean[][] nullMask) thr
564567
}
565568

566569
public DuckDBAppender append(byte[] values) throws SQLException {
567-
Column col = currentColumn(DUCKDB_TYPE_BLOB);
570+
Column col = currentColumn(varlenTypes);
568571
if (values == null) {
569572
return appendNull();
570573
}
@@ -740,13 +743,24 @@ public DuckDBAppender append(double[][] values, boolean[][] nullMask) throws SQL
740743
// append objects
741744

742745
public DuckDBAppender append(String value) throws SQLException {
743-
Column col = currentColumn(DUCKDB_TYPE_VARCHAR);
746+
Column col = currentColumn(varcharOrEnumTypes);
744747
if (value == null) {
745748
return appendNull();
746749
}
747750

748-
byte[] bytes = value.getBytes(UTF_8);
749-
putStringOrBlob(col, rowIdx, bytes);
751+
switch (col.colType) {
752+
case DUCKDB_TYPE_VARCHAR: {
753+
byte[] bytes = value.getBytes(UTF_8);
754+
putStringOrBlob(col, rowIdx, bytes);
755+
break;
756+
}
757+
case DUCKDB_TYPE_ENUM: {
758+
putEnum(col, rowIdx, value);
759+
break;
760+
}
761+
default:
762+
throw new SQLException(createErrMsg("Invalid type: " + col.colType));
763+
}
750764

751765
moveToNextColumn();
752766
return this;
@@ -1842,6 +1856,16 @@ private void putCompositeElement(Column col, long vectorIdx, Object value) throw
18421856
putStringOrBlob(col, vectorIdx, bytes);
18431857
break;
18441858
}
1859+
case DUCKDB_TYPE_ENUM: {
1860+
String st = (String) value;
1861+
putEnum(col, vectorIdx, st);
1862+
break;
1863+
}
1864+
case DUCKDB_TYPE_BLOB: {
1865+
byte[] bytes = (byte[]) (value);
1866+
putStringOrBlob(col, vectorIdx, bytes);
1867+
break;
1868+
}
18451869
case DUCKDB_TYPE_UUID: {
18461870
UUID uid = (UUID) value;
18471871
long mostSigBits = uid.getMostSignificantBits();
@@ -2058,6 +2082,31 @@ private Column putUnionTag(Column col, long vectorIdx, String tag) throws SQLExc
20582082
return col.children.get(fieldWithTag);
20592083
}
20602084

2085+
private void putEnum(Column col, long vectorIdx, String value) throws SQLException {
2086+
Integer numValueNullable = col.enumDict.get(value);
2087+
if (null == numValueNullable) {
2088+
throw new SQLException(createErrMsg("invalid ENUM value specified: '" + value +
2089+
"', expected one of: " + col.enumDict.keySet()));
2090+
}
2091+
2092+
int pos = (int) (vectorIdx * col.enumInternalType.widthBytes);
2093+
col.data.position(pos);
2094+
2095+
switch (col.enumInternalType) {
2096+
case DUCKDB_TYPE_UTINYINT:
2097+
col.data.put(numValueNullable.byteValue());
2098+
return;
2099+
case DUCKDB_TYPE_USMALLINT:
2100+
col.data.putShort(numValueNullable.shortValue());
2101+
return;
2102+
case DUCKDB_TYPE_UINTEGER:
2103+
col.data.putInt(numValueNullable.intValue());
2104+
return;
2105+
default:
2106+
throw new SQLException(createErrMsg("invalid ENUM internal type: " + col.enumInternalType));
2107+
}
2108+
}
2109+
20612110
// state invariants
20622111

20632112
private boolean rowBegunInvariant() {
@@ -2254,6 +2303,17 @@ private static List<Column> createTopLevelColumns(ByteBuffer chunkRef, ByteBuffe
22542303
return columns;
22552304
}
22562305

2306+
private static Map<String, Integer> readEnumDict(ByteBuffer colTypeRef) {
2307+
Map<String, Integer> dict = new LinkedHashMap<>();
2308+
long size = duckdb_enum_dictionary_size(colTypeRef);
2309+
for (long i = 0; i < size; i++) {
2310+
byte[] nameUtf8 = duckdb_enum_dictionary_value(colTypeRef, i);
2311+
String name = strFromUTF8(nameUtf8);
2312+
dict.put(name, (int) i);
2313+
}
2314+
return dict;
2315+
}
2316+
22572317
private static class Column {
22582318
private final Column parent;
22592319
private final int idx;
@@ -2264,6 +2324,8 @@ private static class Column {
22642324
private final int decimalScale;
22652325
private final long arraySize;
22662326
private final String structFieldName;
2327+
private final Map<String, Integer> enumDict;
2328+
private final CAPIType enumInternalType;
22672329

22682330
private final ByteBuffer vectorRef;
22692331
private final List<Column> children = new ArrayList<>();
@@ -2323,8 +2385,17 @@ private Column(Column parent, int idx, ByteBuffer colTypeRef, ByteBuffer vector,
23232385
this.arraySize = duckdb_array_type_array_size(parent.colTypeRef);
23242386
}
23252387

2388+
if (colType == DUCKDB_TYPE_ENUM) {
2389+
this.enumDict = readEnumDict(this.colTypeRef);
2390+
int enumInternalTypeId = duckdb_enum_internal_type(this.colTypeRef);
2391+
this.enumInternalType = capiTypeFromTypeId(enumInternalTypeId);
2392+
} else {
2393+
this.enumDict = null;
2394+
this.enumInternalType = null;
2395+
}
2396+
23262397
long maxElems = maxElementsCount();
2327-
if (colType.widthBytes > 0 || colType == DUCKDB_TYPE_DECIMAL) {
2398+
if (colType.widthBytes > 0 || colType == DUCKDB_TYPE_DECIMAL || colType == DUCKDB_TYPE_ENUM) {
23282399
long vectorSizeBytes = maxElems * widthBytes();
23292400
this.data = duckdb_vector_get_data(vectorRef, vectorSizeBytes);
23302401
if (null == this.data) {
@@ -2423,6 +2494,8 @@ void setNullOnVectorIdx(long vectorIdx) {
24232494
long widthBytes() {
24242495
if (colType == DUCKDB_TYPE_DECIMAL) {
24252496
return decimalInternalType.widthBytes;
2497+
} else if (colType == DUCKDB_TYPE_ENUM) {
2498+
return enumInternalType.widthBytes;
24262499
} else {
24272500
return colType.widthBytes;
24282501
}

src/main/java/org/duckdb/DuckDBBindings.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ public class DuckDBBindings {
4343

4444
static native void duckdb_destroy_logical_type(ByteBuffer logical_type);
4545

46+
static native int duckdb_enum_internal_type(ByteBuffer logical_type);
47+
48+
static native long duckdb_enum_dictionary_size(ByteBuffer logical_type);
49+
50+
static native byte[] duckdb_enum_dictionary_value(ByteBuffer logical_type, long index);
51+
4652
// vector
4753

4854
static native ByteBuffer duckdb_create_vector(ByteBuffer logical_type);
@@ -163,7 +169,7 @@ enum CAPIType {
163169
// duckdb_timestamp_ns (nanoseconds)
164170
DUCKDB_TYPE_TIMESTAMP_NS(22, 8),
165171
// enum type, only useful as logical type
166-
DUCKDB_TYPE_ENUM(23),
172+
DUCKDB_TYPE_ENUM(23, 0),
167173
// list type, only useful as logical type
168174
DUCKDB_TYPE_LIST(24, 16),
169175
// struct type, only useful as logical type

src/test/java/org/duckdb/TestAppender.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.duckdb;
22

3+
import static java.nio.charset.StandardCharsets.UTF_8;
34
import static java.time.ZoneOffset.UTC;
45
import static org.duckdb.DuckDBHugeInt.HUGE_INT_MAX;
56
import static org.duckdb.DuckDBHugeInt.HUGE_INT_MIN;
@@ -848,4 +849,75 @@ public static void test_appender_incomplete_flush() throws Exception {
848849
}
849850
}
850851
}
852+
853+
public static void test_appender_varchar_as_bytes() throws Exception {
854+
try (DuckDBConnection conn = DriverManager.getConnection(JDBC_URL).unwrap(DuckDBConnection.class);
855+
Statement stmt = conn.createStatement()) {
856+
stmt.execute("CREATE TABLE tab1 (col1 INTEGER, col2 VARCHAR)");
857+
String cjkValue = "\u4B54\uD86D\uDF7C\uD83D\uDD25\uD83D\uDE1C";
858+
859+
try (DuckDBAppender appender = conn.createAppender("tab1")) {
860+
appender.beginRow()
861+
.append(41)
862+
.append("foo".getBytes(UTF_8))
863+
.endRow()
864+
.beginRow()
865+
.append(42)
866+
.append(cjkValue.getBytes(UTF_8))
867+
.endRow();
868+
}
869+
870+
try (ResultSet rs = stmt.executeQuery("SELECT col2 FROM tab1 ORDER BY col1")) {
871+
assertTrue(rs.next());
872+
assertEquals(rs.getString(1), "foo");
873+
874+
assertTrue(rs.next());
875+
assertEquals(rs.getString(1), cjkValue);
876+
877+
assertFalse(rs.next());
878+
}
879+
}
880+
}
881+
882+
public static void test_appender_basic_enum() throws Exception {
883+
try (DuckDBConnection conn = DriverManager.getConnection(JDBC_URL).unwrap(DuckDBConnection.class);
884+
Statement stmt = conn.createStatement()) {
885+
886+
stmt.execute("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');");
887+
stmt.execute("CREATE TABLE tab1(col1 INTEGER, col2 mood)");
888+
889+
try (DuckDBAppender appender = conn.createAppender("tab1")) {
890+
appender.beginRow().append(41).append("sad").endRow();
891+
appender.beginRow().append(42).append("happy").endRow();
892+
appender.beginRow().append(43).appendDefault().endRow();
893+
appender.beginRow().append(44).appendNull().endRow();
894+
appender.beginRow().append(45).append("ok").endRow();
895+
}
896+
897+
try (ResultSet rs = stmt.executeQuery("SELECT CAST(col2 AS VARCHAR) FROM tab1 ORDER BY col1")) {
898+
assertTrue(rs.next());
899+
assertEquals(rs.getString(1), "sad");
900+
901+
assertTrue(rs.next());
902+
assertEquals(rs.getString(1), "happy");
903+
904+
assertTrue(rs.next());
905+
assertNull(rs.getObject(1));
906+
assertTrue(rs.wasNull());
907+
908+
assertTrue(rs.next());
909+
assertNull(rs.getObject(1));
910+
assertTrue(rs.wasNull());
911+
912+
assertTrue(rs.next());
913+
assertEquals(rs.getString(1), "ok");
914+
915+
assertFalse(rs.next());
916+
}
917+
918+
try (DuckDBAppender appender = conn.createAppender("tab1")) {
919+
assertThrows(() -> { appender.beginRow().append(44).append("foobar").endRow(); }, SQLException.class);
920+
}
921+
}
922+
}
851923
}

0 commit comments

Comments
 (0)