From ab164c922f97169d2e74caee0219b73941330db3 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Tue, 26 Sep 2023 11:31:09 +0800 Subject: [PATCH] ningbo --- fe/be-java-extensions/avro-scanner/pom.xml | 99 - .../apache/doris/avro/AvroColumnValue.java | 167 -- .../org/apache/doris/avro/AvroJNIScanner.java | 246 -- .../org/apache/doris/avro/AvroProperties.java | 38 - .../org/apache/doris/avro/AvroReader.java | 37 - .../org/apache/doris/avro/HDFSFileReader.java | 78 - .../org/apache/doris/avro/S3FileReader.java | 97 - .../java/org/apache/doris/avro/S3Utils.java | 109 - .../src/main/resources/package.xml | 41 - fe/be-java-extensions/hudi-scanner/pom.xml | 302 --- .../apache/doris/hudi/HudiColumnValue.java | 199 -- .../org/apache/doris/hudi/HudiJniScanner.java | 244 -- .../java/org/apache/doris/hudi/Utils.java | 134 - .../src/main/resources/package.xml | 41 - .../apache/doris/hudi/BaseSplitReader.scala | 725 ------ .../doris/hudi/HoodieRecordIterator.scala | 143 -- .../doris/hudi/MORSnapshotSplitReader.scala | 183 -- .../apache/doris/hudi/HudiJniScannerTest.java | 31 - fe/be-java-extensions/java-common/pom.xml | 64 - .../classloader/JniScannerClassLoader.java | 39 - .../common/classloader/ScannerLoader.java | 140 -- .../classloader/ThreadClassLoaderContext.java | 35 - .../common/exception/InternalException.java | 28 - .../common/exception/UdfRuntimeException.java | 28 - .../apache/doris/common/jni/JniScanner.java | 130 - .../doris/common/jni/MockJniScanner.java | 203 -- .../doris/common/jni/utils/JMXJsonUtil.java | 282 --- .../common/jni/utils/JNINativeMethod.java | 38 - .../common/jni/utils/JavaUdfDataType.java | 235 -- .../doris/common/jni/utils/JniUtil.java | 272 --- .../common/jni/utils/JvmPauseMonitor.java | 321 --- .../doris/common/jni/utils/OffHeap.java | 183 -- .../common/jni/utils/TypeNativeBytes.java | 132 - .../doris/common/jni/utils/UdfUtils.java | 509 ---- .../doris/common/jni/vec/ColumnType.java | 374 --- .../doris/common/jni/vec/ColumnValue.java | 71 - .../common/jni/vec/NativeColumnValue.java | 48 - .../doris/common/jni/vec/ScanPredicate.java | 298 --- .../doris/common/jni/vec/TableSchema.java | 83 - .../doris/common/jni/vec/VectorColumn.java | 702 ------ .../doris/common/jni/vec/VectorTable.java | 151 -- .../src/main/resources/log4j.properties | 26 - .../src/main/resources/package.xml | 41 - .../doris/common/jni/JniScannerTest.java | 61 - fe/be-java-extensions/java-udf/pom.xml | 80 - .../org/apache/doris/udf/BaseExecutor.java | 1091 --------- .../org/apache/doris/udf/UdafExecutor.java | 428 ---- .../java/org/apache/doris/udf/UdfConvert.java | 1774 -------------- .../org/apache/doris/udf/UdfExecutor.java | 258 -- .../java-udf/src/main/resources/package.xml | 41 - .../org/apache/doris/udf/ConstantOneUdf.java | 24 - .../org/apache/doris/udf/DateTimeUdf.java | 30 - .../java/org/apache/doris/udf/DecimalUdf.java | 31 - .../org/apache/doris/udf/LargeIntUdf.java | 31 - .../org/apache/doris/udf/SimpleAddUdf.java | 24 - .../org/apache/doris/udf/StringConcatUdf.java | 24 - fe/be-java-extensions/jdbc-scanner/pom.xml | 96 - .../org/apache/doris/jdbc/JdbcDataSource.java | 44 - .../org/apache/doris/jdbc/JdbcExecutor.java | 2166 ----------------- .../src/main/resources/package.xml | 41 - .../max-compute-scanner/pom.xml | 104 - .../maxcompute/MaxComputeColumnValue.java | 237 -- .../maxcompute/MaxComputeJniScanner.java | 262 -- .../doris/maxcompute/MaxComputeTableScan.java | 81 - .../src/main/resources/package.xml | 41 - fe/be-java-extensions/paimon-scanner/pom.xml | 133 - .../doris/paimon/PaimonColumnValue.java | 145 -- .../apache/doris/paimon/PaimonJniScanner.java | 173 -- .../doris/paimon/PaimonScannerUtils.java | 45 - .../src/main/resources/package.xml | 41 - fe/be-java-extensions/pom.xml | 48 - .../preload-extensions/pom.xml | 253 -- .../java/org/apache/doris/preload/README.md | 217 -- .../src/main/resources/package.xml | 41 - fe/pom.xml | 2 +- 75 files changed, 1 insertion(+), 15413 deletions(-) delete mode 100644 fe/be-java-extensions/avro-scanner/pom.xml delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroColumnValue.java delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroJNIScanner.java delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroProperties.java delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroReader.java delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/HDFSFileReader.java delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3FileReader.java delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3Utils.java delete mode 100644 fe/be-java-extensions/avro-scanner/src/main/resources/package.xml delete mode 100644 fe/be-java-extensions/hudi-scanner/pom.xml delete mode 100644 fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiColumnValue.java delete mode 100644 fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java delete mode 100644 fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java delete mode 100644 fe/be-java-extensions/hudi-scanner/src/main/resources/package.xml delete mode 100644 fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala delete mode 100644 fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala delete mode 100644 fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala delete mode 100644 fe/be-java-extensions/hudi-scanner/src/test/java/org/apache/doris/hudi/HudiJniScannerTest.java delete mode 100644 fe/be-java-extensions/java-common/pom.xml delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/JniScannerClassLoader.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ScannerLoader.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ThreadClassLoaderContext.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/InternalException.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/UdfRuntimeException.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/MockJniScanner.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JMXJsonUtil.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JniUtil.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JvmPauseMonitor.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/TypeNativeBytes.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/UdfUtils.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnValue.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/NativeColumnValue.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ScanPredicate.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/TableSchema.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java delete mode 100644 fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java delete mode 100644 fe/be-java-extensions/java-common/src/main/resources/log4j.properties delete mode 100644 fe/be-java-extensions/java-common/src/main/resources/package.xml delete mode 100644 fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java delete mode 100644 fe/be-java-extensions/java-udf/pom.xml delete mode 100644 fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java delete mode 100644 fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdafExecutor.java delete mode 100644 fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfConvert.java delete mode 100644 fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java delete mode 100644 fe/be-java-extensions/java-udf/src/main/resources/package.xml delete mode 100644 fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/ConstantOneUdf.java delete mode 100644 fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DateTimeUdf.java delete mode 100644 fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DecimalUdf.java delete mode 100644 fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/LargeIntUdf.java delete mode 100644 fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/SimpleAddUdf.java delete mode 100644 fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/StringConcatUdf.java delete mode 100644 fe/be-java-extensions/jdbc-scanner/pom.xml delete mode 100644 fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java delete mode 100644 fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java delete mode 100644 fe/be-java-extensions/jdbc-scanner/src/main/resources/package.xml delete mode 100644 fe/be-java-extensions/max-compute-scanner/pom.xml delete mode 100644 fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java delete mode 100644 fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java delete mode 100644 fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java delete mode 100644 fe/be-java-extensions/max-compute-scanner/src/main/resources/package.xml delete mode 100644 fe/be-java-extensions/paimon-scanner/pom.xml delete mode 100644 fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java delete mode 100644 fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java delete mode 100644 fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonScannerUtils.java delete mode 100644 fe/be-java-extensions/paimon-scanner/src/main/resources/package.xml delete mode 100644 fe/be-java-extensions/pom.xml delete mode 100644 fe/be-java-extensions/preload-extensions/pom.xml delete mode 100644 fe/be-java-extensions/preload-extensions/src/main/java/org/apache/doris/preload/README.md delete mode 100644 fe/be-java-extensions/preload-extensions/src/main/resources/package.xml diff --git a/fe/be-java-extensions/avro-scanner/pom.xml b/fe/be-java-extensions/avro-scanner/pom.xml deleted file mode 100644 index f95fa947051336..00000000000000 --- a/fe/be-java-extensions/avro-scanner/pom.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - - avro-scanner - - - 8 - 8 - UTF-8 - - - - - org.apache.doris - java-common - ${project.version} - - - org.apache.hadoop - hadoop-client - provided - - - org.apache.hadoop - hadoop-common - provided - - - org.apache.hadoop - hadoop-hdfs - provided - - - com.amazonaws - aws-java-sdk-s3 - - - org.apache.doris - hive-catalog-shade - provided - - - - - - avro-scanner - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/main/resources/package.xml - - - - - - - - - - make-assembly - package - - single - - - - - - - \ No newline at end of file diff --git a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroColumnValue.java b/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroColumnValue.java deleted file mode 100644 index dd72c9aad5010d..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroColumnValue.java +++ /dev/null @@ -1,167 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.avro; - -import org.apache.doris.common.jni.vec.ColumnValue; - -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.List; -import java.util.Map.Entry; - -public class AvroColumnValue implements ColumnValue { - - private final Object fieldData; - private final ObjectInspector fieldInspector; - - public AvroColumnValue(ObjectInspector fieldInspector, Object fieldData) { - this.fieldInspector = fieldInspector; - this.fieldData = fieldData; - } - - private Object inspectObject() { - return ((PrimitiveObjectInspector) fieldInspector).getPrimitiveJavaObject(fieldData); - } - - @Override - public boolean canGetStringAsBytes() { - return false; - } - - @Override - public boolean isNull() { - return false; - } - - @Override - public boolean getBoolean() { - return (boolean) inspectObject(); - } - - @Override - public byte getByte() { - return (byte) inspectObject(); - } - - @Override - public short getShort() { - return (short) inspectObject(); - } - - @Override - public int getInt() { - return (int) inspectObject(); - } - - @Override - public float getFloat() { - return (float) inspectObject(); - } - - @Override - public long getLong() { - return (long) inspectObject(); - } - - @Override - public double getDouble() { - return (double) inspectObject(); - } - - @Override - public BigInteger getBigInteger() { - return null; - } - - @Override - public BigDecimal getDecimal() { - return (BigDecimal) inspectObject(); - } - - @Override - public String getString() { - return inspectObject().toString(); - } - - @Override - public byte[] getStringAsBytes() { - throw new UnsupportedOperationException(); - } - - @Override - public LocalDate getDate() { - // avro has no date type - return null; - } - - @Override - public LocalDateTime getDateTime() { - // avro has no dateTime type - return null; - } - - @Override - public byte[] getBytes() { - return (byte[]) inspectObject(); - } - - @Override - public void unpackArray(List values) { - ListObjectInspector inspector = (ListObjectInspector) fieldInspector; - List items = inspector.getList(fieldData); - ObjectInspector itemInspector = inspector.getListElementObjectInspector(); - for (Object item : items) { - AvroColumnValue avroColumnValue = null; - if (item != null) { - avroColumnValue = new AvroColumnValue(itemInspector, item); - } - values.add(avroColumnValue); - } - } - - @Override - public void unpackMap(List keys, List values) { - MapObjectInspector inspector = (MapObjectInspector) fieldInspector; - ObjectInspector keyObjectInspector = inspector.getMapKeyObjectInspector(); - ObjectInspector valueObjectInspector = inspector.getMapValueObjectInspector(); - for (Entry kv : inspector.getMap(fieldData).entrySet()) { - AvroColumnValue avroKey = null; - AvroColumnValue avroValue = null; - if (kv.getKey() != null) { - avroKey = new AvroColumnValue(keyObjectInspector, kv.getKey()); - } - if (kv.getValue() != null) { - avroValue = new AvroColumnValue(valueObjectInspector, kv.getValue()); - } - keys.add(avroKey); - values.add(avroValue); - } - } - - @Override - public void unpackStruct(List structFieldIndex, List values) { - - } -} diff --git a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroJNIScanner.java b/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroJNIScanner.java deleted file mode 100644 index 11bce610d70e7a..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroJNIScanner.java +++ /dev/null @@ -1,246 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.avro; - -import org.apache.doris.common.jni.JniScanner; -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ScanPredicate; -import org.apache.doris.common.jni.vec.TableSchema; -import org.apache.doris.common.jni.vec.TableSchema.SchemaColumn; -import org.apache.doris.thrift.TFileType; -import org.apache.doris.thrift.TPrimitiveType; - -import org.apache.avro.Schema; -import org.apache.avro.Schema.Field; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; -import java.util.Properties; -import java.util.stream.Collectors; - -public class AvroJNIScanner extends JniScanner { - - private static final Logger LOG = LogManager.getLogger(AvroJNIScanner.class); - private final TFileType fileType; - private final String uri; - private final Map requiredParams; - private final Integer fetchSize; - private int[] requiredColumnIds; - private String[] columnTypes; - private String[] requiredFields; - private ColumnType[] requiredTypes; - private AvroReader avroReader; - private final boolean isGetTableSchema; - private StructObjectInspector rowInspector; - private Deserializer deserializer; - private StructField[] structFields; - private ObjectInspector[] fieldInspectors; - private String serde; - - /** - * Call by JNI for get table data or get table schema - * - * @param fetchSize The size of data fetched each time - * @param requiredParams required params - */ - public AvroJNIScanner(int fetchSize, Map requiredParams) { - this.requiredParams = requiredParams; - this.fetchSize = fetchSize; - this.isGetTableSchema = Boolean.parseBoolean(requiredParams.get(AvroProperties.IS_GET_TABLE_SCHEMA)); - this.fileType = TFileType.findByValue(Integer.parseInt(requiredParams.get(AvroProperties.FILE_TYPE))); - this.uri = requiredParams.get(AvroProperties.URI); - if (!isGetTableSchema) { - this.columnTypes = requiredParams.get(AvroProperties.COLUMNS_TYPES) - .split(AvroProperties.COLUMNS_TYPE_DELIMITER); - this.requiredFields = requiredParams.get(AvroProperties.REQUIRED_FIELDS) - .split(AvroProperties.FIELDS_DELIMITER); - this.requiredTypes = new ColumnType[requiredFields.length]; - this.serde = requiredParams.get(AvroProperties.HIVE_SERDE); - this.structFields = new StructField[requiredFields.length]; - this.fieldInspectors = new ObjectInspector[requiredFields.length]; - } - } - - private void init() throws Exception { - requiredColumnIds = new int[requiredFields.length]; - for (int i = 0; i < requiredFields.length; i++) { - ColumnType columnType = ColumnType.parseType(requiredFields[i], columnTypes[i]); - requiredTypes[i] = columnType; - requiredColumnIds[i] = i; - } - - Properties properties = createProperties(); - deserializer = getDeserializer(new Configuration(), properties, this.serde); - rowInspector = (StructObjectInspector) deserializer.getObjectInspector(); - - for (int i = 0; i < requiredFields.length; i++) { - StructField field = rowInspector.getStructFieldRef(requiredFields[i]); - structFields[i] = field; - fieldInspectors[i] = field.getFieldObjectInspector(); - } - } - - public Properties createProperties() { - Properties properties = new Properties(); - properties.setProperty(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, - Arrays.stream(this.requiredColumnIds).mapToObj(String::valueOf).collect(Collectors.joining(","))); - properties.setProperty(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, String.join(",", requiredFields)); - properties.setProperty(AvroProperties.COLUMNS, String.join(",", requiredFields)); - properties.setProperty(AvroProperties.COLUMNS2TYPES, String.join(",", columnTypes)); - properties.setProperty(serdeConstants.SERIALIZATION_LIB, this.serde); - return properties; - } - - private Deserializer getDeserializer(Configuration configuration, Properties properties, String name) - throws Exception { - Class deserializerClass = Class.forName(name, true, JavaUtils.getClassLoader()) - .asSubclass(Deserializer.class); - Deserializer deserializer = deserializerClass.getConstructor().newInstance(); - deserializer.initialize(configuration, properties); - return deserializer; - } - - @Override - public void open() throws IOException { - try { - if (!isGetTableSchema) { - init(); - } - } catch (Exception e) { - LOG.warn("Failed to init avro scanner. ", e); - throw new IOException(e); - } - switch (fileType) { - case FILE_HDFS: - this.avroReader = new HDFSFileReader(uri); - break; - case FILE_S3: - String accessKey = requiredParams.get(AvroProperties.S3_ACCESS_KEY); - String secretKey = requiredParams.get(AvroProperties.S3_SECRET_KEY); - String endpoint = requiredParams.get(AvroProperties.S3_ENDPOINT); - String region = requiredParams.get(AvroProperties.S3_REGION); - this.avroReader = new S3FileReader(accessKey, secretKey, endpoint, region, uri); - break; - default: - LOG.warn("Unsupported " + fileType.name() + " file type."); - throw new IOException("Unsupported " + fileType.name() + " file type."); - } - this.avroReader.open(new Configuration()); - if (!isGetTableSchema) { - initTableInfo(requiredTypes, requiredFields, new ScanPredicate[0], fetchSize); - } - } - - @Override - public void close() throws IOException { - if (Objects.nonNull(avroReader)) { - avroReader.close(); - } - } - - @Override - protected int getNext() throws IOException { - int numRows = 0; - for (; numRows < getBatchSize(); numRows++) { - if (!avroReader.hasNext()) { - break; - } - GenericRecord rowRecord = (GenericRecord) avroReader.getNext(); - for (int i = 0; i < requiredFields.length; i++) { - Object fieldData = rowRecord.get(requiredFields[i]); - if (fieldData == null) { - appendData(i, null); - } else { - AvroColumnValue fieldValue = new AvroColumnValue(fieldInspectors[i], fieldData); - appendData(i, fieldValue); - } - } - } - return numRows; - } - - @Override - protected TableSchema parseTableSchema() throws UnsupportedOperationException { - Schema schema = avroReader.getSchema(); - List schemaFields = schema.getFields(); - List schemaColumns = new ArrayList<>(); - for (Field schemaField : schemaFields) { - Schema avroSchema = schemaField.schema(); - String columnName = schemaField.name().toLowerCase(Locale.ROOT); - - SchemaColumn schemaColumn = new SchemaColumn(); - TPrimitiveType tPrimitiveType = serializeSchemaType(avroSchema, schemaColumn); - schemaColumn.setName(columnName); - schemaColumn.setType(tPrimitiveType); - schemaColumns.add(schemaColumn); - } - return new TableSchema(schemaColumns); - } - - private TPrimitiveType serializeSchemaType(Schema avroSchema, SchemaColumn schemaColumn) - throws UnsupportedOperationException { - Schema.Type type = avroSchema.getType(); - switch (type) { - case NULL: - return TPrimitiveType.NULL_TYPE; - case STRING: - return TPrimitiveType.STRING; - case INT: - return TPrimitiveType.INT; - case BOOLEAN: - return TPrimitiveType.BOOLEAN; - case LONG: - return TPrimitiveType.BIGINT; - case FLOAT: - return TPrimitiveType.FLOAT; - case BYTES: - return TPrimitiveType.BINARY; - case DOUBLE: - return TPrimitiveType.DOUBLE; - case ARRAY: - SchemaColumn arrayChildColumn = new SchemaColumn(); - schemaColumn.addChildColumn(arrayChildColumn); - arrayChildColumn.setType(serializeSchemaType(avroSchema.getElementType(), arrayChildColumn)); - return TPrimitiveType.ARRAY; - case MAP: - SchemaColumn mapChildColumn = new SchemaColumn(); - schemaColumn.addChildColumn(mapChildColumn); - mapChildColumn.setType(serializeSchemaType(avroSchema.getValueType(), mapChildColumn)); - return TPrimitiveType.MAP; - default: - throw new UnsupportedOperationException("avro format: " + type.getName() + " is not supported."); - } - } - -} diff --git a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroProperties.java b/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroProperties.java deleted file mode 100644 index 6619b6888c4a9e..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroProperties.java +++ /dev/null @@ -1,38 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.avro; - -public class AvroProperties { - - protected static final String COLUMNS_TYPE_DELIMITER = "#"; - protected static final String FIELDS_DELIMITER = ","; - - protected static final String IS_GET_TABLE_SCHEMA = "is_get_table_schema"; - protected static final String COLUMNS_TYPES = "columns_types"; - protected static final String REQUIRED_FIELDS = "required_fields"; - protected static final String FILE_TYPE = "file_type"; - protected static final String URI = "uri"; - protected static final String S3_ACCESS_KEY = "s3.access_key"; - protected static final String S3_SECRET_KEY = "s3.secret_key"; - protected static final String S3_ENDPOINT = "s3.endpoint"; - protected static final String S3_REGION = "s3.region"; - protected static final String HIVE_SERDE = "hive.serde"; - protected static final String COLUMNS = "columns"; - protected static final String COLUMNS2TYPES = "columns.types"; - -} diff --git a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroReader.java b/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroReader.java deleted file mode 100644 index eb012e402be416..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/AvroReader.java +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.avro; - -import org.apache.avro.Schema; -import org.apache.hadoop.conf.Configuration; - -import java.io.IOException; - -public interface AvroReader { - - void open(Configuration conf) throws IOException; - - Schema getSchema(); - - boolean hasNext(); - - Object getNext() throws IOException; - - void close() throws IOException; - -} diff --git a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/HDFSFileReader.java b/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/HDFSFileReader.java deleted file mode 100644 index 8c189704027522..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/HDFSFileReader.java +++ /dev/null @@ -1,78 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.avro; - -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileStream; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; - -import java.io.BufferedInputStream; -import java.io.IOException; -import java.net.URI; -import java.util.Objects; - -public class HDFSFileReader implements AvroReader { - private static final Logger LOG = LogManager.getLogger(HDFSFileReader.class); - private final Path filePath; - private final String url; - private DataFileStream reader; - private BufferedInputStream inputStream; - - public HDFSFileReader(String url) { - this.url = url; - this.filePath = new Path(url); - } - - @Override - public void open(Configuration conf) throws IOException { - FileSystem fs = FileSystem.get(URI.create(url), conf); - inputStream = new BufferedInputStream(fs.open(filePath)); - reader = new DataFileStream<>(inputStream, new GenericDatumReader<>()); - } - - @Override - public Schema getSchema() { - return reader.getSchema(); - } - - @Override - public boolean hasNext() { - return reader.hasNext(); - } - - @Override - public Object getNext() throws IOException { - return reader.next(); - } - - @Override - public void close() throws IOException { - if (Objects.nonNull(inputStream)) { - inputStream.close(); - } - if (Objects.nonNull(reader)) { - reader.close(); - } - } -} diff --git a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3FileReader.java b/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3FileReader.java deleted file mode 100644 index 4b1b4a864ce86b..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3FileReader.java +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.avro; - -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileStream; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Objects; - -public class S3FileReader implements AvroReader { - - private static final Logger LOG = LogManager.getLogger(S3FileReader.class); - private final String bucketName; - private final String key; - private AmazonS3 s3Client; - private DataFileStream reader; - private InputStream s3ObjectInputStream; - private final AWSCredentials credentials; - private final String endpoint; - private final String region; - - public S3FileReader(String accessKey, String secretKey, String endpoint, String region, String uri) - throws IOException { - this.endpoint = endpoint; - this.region = region; - this.credentials = new BasicAWSCredentials(accessKey, secretKey); - S3Utils.parseURI(uri); - this.bucketName = S3Utils.getBucket(); - this.key = S3Utils.getKey(); - } - - @Override - public void open(Configuration conf) throws IOException { - s3Client = AmazonS3ClientBuilder.standard() - .withCredentials(new AWSStaticCredentialsProvider(credentials)) - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, region)) - .build(); - S3Object object = s3Client.getObject(new GetObjectRequest(bucketName, key)); - s3ObjectInputStream = object.getObjectContent(); - reader = new DataFileStream<>(s3ObjectInputStream, new GenericDatumReader<>()); - } - - @Override - public Schema getSchema() { - return reader.getSchema(); - } - - @Override - public boolean hasNext() { - return reader.hasNext(); - } - - @Override - public Object getNext() throws IOException { - return reader.next(); - } - - @Override - public void close() throws IOException { - if (Objects.nonNull(s3ObjectInputStream)) { - s3ObjectInputStream.close(); - } - if (Objects.nonNull(reader)) { - reader.close(); - } - } -} diff --git a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3Utils.java b/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3Utils.java deleted file mode 100644 index 85ac4893cf6295..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/java/org/apache/doris/avro/S3Utils.java +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.avro; - -import org.apache.commons.lang3.StringUtils; - -import java.io.IOException; - -public class S3Utils { - private static final String SCHEMA_S3 = "s3"; - private static final String SCHEMA_HTTP = "http"; - private static final String SCHEMA_HTTPS = "https"; - private static final String SCHEME_DELIM = "://"; - private static final String PATH_DELIM = "/"; - private static final String QUERY_DELIM = "\\?"; - private static final String FRAGMENT_DELIM = "#"; - private static String bucket; - private static String key; - - /** - * eg: - * s3: s3://bucket1/path/to/file.txt - * http: http://10.10.10.1:9000/bucket1/to/file.txt - * https: https://10.10.10.1:9000/bucket1/to/file.txt - *

- * schema: s3,http,https - * bucket: bucket1 - * key: path/to/file.txt - */ - public static void parseURI(String uri) throws IOException { - if (StringUtils.isEmpty(uri)) { - throw new IOException("s3 uri is empty."); - } - String[] schemeSplit = uri.split(SCHEME_DELIM); - String rest; - if (schemeSplit.length == 2) { - if (schemeSplit[0].equalsIgnoreCase(SCHEMA_S3)) { - // has scheme, eg: s3://bucket1/path/to/file.txt - rest = schemeSplit[1]; - String[] authoritySplit = rest.split(PATH_DELIM, 2); - if (authoritySplit.length < 1) { - throw new IOException("Invalid S3 URI. uri=" + uri); - } - bucket = authoritySplit[0]; - // support s3://bucket1 - key = authoritySplit.length == 1 ? "/" : authoritySplit[1]; - } else if (schemeSplit[0].equalsIgnoreCase(SCHEMA_HTTP) || schemeSplit[0].equalsIgnoreCase(SCHEMA_HTTPS)) { - // has scheme, eg: http(s)://host/bucket1/path/to/file.txt - rest = schemeSplit[1]; - String[] authoritySplit = rest.split(PATH_DELIM, 3); - if (authoritySplit.length != 3) { - throw new IOException("Invalid S3 HTTP URI: uri=" + uri); - } - // authority_split[1] is host - bucket = authoritySplit[1]; - key = authoritySplit[2]; - } else { - throw new IOException("Invalid S3 HTTP URI: uri=" + uri); - } - - } else if (schemeSplit.length == 1) { - // no scheme, eg: path/to/file.txt - bucket = ""; // unknown - key = uri; - } else { - throw new IOException("Invalid S3 URI. uri=" + uri); - } - - key = key.trim(); - if (StringUtils.isEmpty(key)) { - throw new IOException("Invalid S3 URI. uri=" + uri); - } - // Strip query and fragment if they exist - String[] querySplit = key.split(QUERY_DELIM); - String[] fragmentSplit = querySplit[0].split(FRAGMENT_DELIM); - key = fragmentSplit[0]; - } - - public static String getBucket() { - return bucket; - } - - public static String getKey() { - return key; - } - - public static void main(String[] args) throws IOException { - S3Utils.parseURI("https://10.10.10.1:9000/bucket1/path/person.avro"); - String bucket1 = S3Utils.getBucket(); - String key1 = S3Utils.getKey(); - System.out.println(bucket1 + " " + key1); - } - -} diff --git a/fe/be-java-extensions/avro-scanner/src/main/resources/package.xml b/fe/be-java-extensions/avro-scanner/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/avro-scanner/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/be-java-extensions/hudi-scanner/pom.xml b/fe/be-java-extensions/hudi-scanner/pom.xml deleted file mode 100644 index d4f7a458612c85..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/pom.xml +++ /dev/null @@ -1,302 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - hudi-scanner - - - ${basedir}/../../ - 1 - 2.12.15 - 2.12 - 3.2.0 - 3.2 - 3.0.16 - 1.11.2 - - - - - - org.apache.avro - avro - ${avro.version} - provided - - - org.apache.avro - avro-tools - - - - - - - - - org.scala-lang - scala-library - ${scala.version} - provided - - - org.apache.hadoop - hadoop-common - provided - - - org.apache.hudi - hudi-spark-client - ${hudi.version} - provided - - - org.apache.hudi - hudi-spark-common_${scala.binary.version} - ${hudi.version} - provided - - - org.apache.avro - avro - - - org.apache.hudi - hudi-spark3-common - ${hudi.version} - provided - - - org.apache.hudi - hudi-spark3.2.x_${scala.binary.version} - ${hudi.version} - provided - - - json4s-ast_2.11 - org.json4s - - - json4s-core_2.11 - org.json4s - - - json4s-jackson_2.11 - org.json4s - - - json4s-scalap_2.11 - org.json4s - - - - - org.apache.parquet - parquet-avro - 1.10.1 - provided - - - org.apache.spark - spark-core_${scala.binary.version} - - - javax.servlet - * - - - jackson-module-scala_2.12 - com.fasterxml.jackson.module - - - hadoop-client-api - org.apache.hadoop - - - hadoop-client-runtime - org.apache.hadoop - - - ${spark.version} - provided - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-launcher_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${spark.version} - provided - - - org.codehaus.janino - janino - - - org.codehaus.janino - commons-compiler - - - - - - org.codehaus.janino - janino - ${janino.version} - provided - - - org.codehaus.janino - commons-compiler - - - - - org.codehaus.janino - commons-compiler - ${janino.version} - provided - - - - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} - ${jackson.version} - provided - - - com.google.guava - guava - - - - - org.apache.doris - java-common - ${project.version} - - - org.apache.thrift - libthrift - - - - - - hudi-scanner - src/main/java - src/test/java - - - src/main/resources - - - - - src/test/resources - - - - - - net.alchim31.maven - scala-maven-plugin - 4.7.2 - - - - compile - testCompile - - - - - ${scala.version} - - -unchecked - -deprecation - -feature - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - default-compile - none - - - default-testCompile - none - - - java-compile - - compile - testCompile - - compile - - - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/main/resources/package.xml - - - - - - - - - - make-assembly - package - - single - - - - - - - diff --git a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiColumnValue.java b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiColumnValue.java deleted file mode 100644 index 7d402e84292df7..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiColumnValue.java +++ /dev/null @@ -1,199 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.hudi; - - -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ColumnValue; -import org.apache.doris.common.jni.vec.NativeColumnValue; - -import org.apache.spark.sql.catalyst.InternalRow; -import org.apache.spark.sql.catalyst.expressions.UnsafeRow; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.time.Instant; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.util.List; - -public class HudiColumnValue implements ColumnValue, NativeColumnValue { - private boolean isUnsafe; - private InternalRow internalRow; - private int ordinal; - private int precision; - private int scale; - - HudiColumnValue() { - } - - HudiColumnValue(InternalRow internalRow, int ordinal, int precision, int scale) { - this.isUnsafe = internalRow instanceof UnsafeRow; - this.internalRow = internalRow; - this.ordinal = ordinal; - this.precision = precision; - this.scale = scale; - } - - public void reset(InternalRow internalRow, int ordinal, int precision, int scale) { - this.isUnsafe = internalRow instanceof UnsafeRow; - this.internalRow = internalRow; - this.ordinal = ordinal; - this.precision = precision; - this.scale = scale; - } - - public void reset(int ordinal, int precision, int scale) { - this.ordinal = ordinal; - this.precision = precision; - this.scale = scale; - } - - public void reset(InternalRow internalRow) { - this.isUnsafe = internalRow instanceof UnsafeRow; - this.internalRow = internalRow; - } - - @Override - public boolean canGetStringAsBytes() { - return true; - } - - @Override - public boolean isNull() { - return internalRow.isNullAt(ordinal); - } - - @Override - public boolean getBoolean() { - return internalRow.getBoolean(ordinal); - } - - @Override - public byte getByte() { - return internalRow.getByte(ordinal); - } - - @Override - public short getShort() { - return internalRow.getShort(ordinal); - } - - @Override - public int getInt() { - return internalRow.getInt(ordinal); - } - - @Override - public float getFloat() { - return internalRow.getFloat(ordinal); - } - - @Override - public long getLong() { - return internalRow.getLong(ordinal); - } - - @Override - public double getDouble() { - return internalRow.getDouble(ordinal); - } - - @Override - public BigInteger getBigInteger() { - throw new UnsupportedOperationException("Hoodie type does not support largeint"); - } - - @Override - public BigDecimal getDecimal() { - return internalRow.getDecimal(ordinal, precision, scale).toJavaBigDecimal(); - } - - @Override - public String getString() { - return internalRow.getUTF8String(ordinal).toString(); - } - - @Override - public byte[] getStringAsBytes() { - return internalRow.getUTF8String(ordinal).getBytes(); - } - - @Override - public LocalDate getDate() { - return LocalDate.ofEpochDay(internalRow.getInt(ordinal)); - } - - @Override - public LocalDateTime getDateTime() { - long datetime = internalRow.getLong(ordinal); - long seconds; - long nanoseconds; - if (precision == 3) { - seconds = datetime / 1000; - nanoseconds = (datetime % 1000) * 1000000; - } else if (precision == 6) { - seconds = datetime / 1000000; - nanoseconds = (datetime % 1000000) * 1000; - } else { - throw new RuntimeException("Hoodie timestamp only support milliseconds and microseconds"); - } - return LocalDateTime.ofInstant(Instant.ofEpochSecond(seconds, nanoseconds), ZoneId.systemDefault()); - } - - @Override - public byte[] getBytes() { - return internalRow.getBinary(ordinal); - } - - @Override - public void unpackArray(List values) { - - } - - @Override - public void unpackMap(List keys, List values) { - - } - - @Override - public void unpackStruct(List structFieldIndex, List values) { - - } - - @Override - public NativeValue getNativeValue(ColumnType.Type type) { - if (isUnsafe) { - UnsafeRow unsafeRow = (UnsafeRow) internalRow; - switch (type) { - case CHAR: - case VARCHAR: - case BINARY: - case STRING: - long offsetAndSize = unsafeRow.getLong(ordinal); - int offset = (int) (offsetAndSize >> 32); - int size = (int) offsetAndSize; - return new NativeValue(unsafeRow.getBaseObject(), offset, size); - default: - return null; - } - } - return null; - } -} diff --git a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java deleted file mode 100644 index 64c4fd70e7b542..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java +++ /dev/null @@ -1,244 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.hudi; - - -import org.apache.doris.common.jni.JniScanner; -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ScanPredicate; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.util.WeakIdentityHashMap; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.log4j.Logger; -import org.apache.spark.sql.catalyst.InternalRow; -import org.apache.spark.sql.sources.Filter; -import scala.collection.Iterator; - -import java.io.Closeable; -import java.io.IOException; -import java.lang.reflect.Field; -import java.security.PrivilegedExceptionAction; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; -import java.util.stream.Collectors; - -/** - * The hudi JniScanner - */ -public class HudiJniScanner extends JniScanner { - private static final Logger LOG = Logger.getLogger(HudiJniScanner.class); - - private final int fetchSize; - private final String debugString; - private final HoodieSplit split; - private final ScanPredicate[] predicates; - private final ClassLoader classLoader; - private final UserGroupInformation ugi; - - private long getRecordReaderTimeNs = 0; - private Iterator recordIterator; - - /** - * `GenericDatumReader` of avro is a thread local map, that stores `WeakIdentityHashMap`. - * `WeakIdentityHashMap` has cached the avro resolving decoder, and the cached resolver can only be cleaned when - * its avro schema is recycled and become a week reference. However, the behavior of the week reference queue - * of `WeakIdentityHashMap` is unpredictable. Secondly, the decoder is very memory intensive, the number of threads - * to call the thread local map cannot be too many. - * Two solutions: - * 1. Reduce the number of threads reading avro logs and keep the readers in a fixed thread pool. - * 2. Regularly cleaning the cached resolvers in the thread local map by reflection. - */ - private static final AtomicLong lastUpdateTime = new AtomicLong(System.currentTimeMillis()); - private static final long RESOLVER_TIME_OUT = 60000; - private static final ExecutorService avroReadPool; - private static ThreadLocal> AVRO_RESOLVER_CACHE; - private static final Map> cachedResolvers = new ConcurrentHashMap<>(); - private static final ReadWriteLock cleanResolverLock = new ReentrantReadWriteLock(); - private static final ScheduledExecutorService cleanResolverService = Executors.newScheduledThreadPool(1); - - static { - int numThreads = Math.max(Runtime.getRuntime().availableProcessors() * 2 + 1, 4); - if (numThreads > 32) { - numThreads = Runtime.getRuntime().availableProcessors(); - } - avroReadPool = Executors.newFixedThreadPool(numThreads, - new ThreadFactoryBuilder().setNameFormat("avro-log-reader-%d").build()); - LOG.info("Create " + numThreads + " daemon threads to load avro logs"); - - Class avroReader = GenericDatumReader.class; - try { - Field field = avroReader.getDeclaredField("RESOLVER_CACHE"); - field.setAccessible(true); - AVRO_RESOLVER_CACHE = (ThreadLocal>) field.get(null); - LOG.info("Get the resolved cache for avro reader"); - } catch (Exception e) { - AVRO_RESOLVER_CACHE = null; - LOG.warn("Failed to get the resolved cache for avro reader"); - } - - cleanResolverService.scheduleAtFixedRate(() -> { - cleanResolverLock.writeLock().lock(); - try { - if (System.currentTimeMillis() - lastUpdateTime.get() > RESOLVER_TIME_OUT) { - for (WeakIdentityHashMap solver : cachedResolvers.values()) { - solver.clear(); - } - lastUpdateTime.set(System.currentTimeMillis()); - } - } finally { - cleanResolverLock.writeLock().unlock(); - } - }, RESOLVER_TIME_OUT, RESOLVER_TIME_OUT, TimeUnit.MILLISECONDS); - } - - public HudiJniScanner(int fetchSize, Map params) { - debugString = params.entrySet().stream().map(kv -> kv.getKey() + "=" + kv.getValue()) - .collect(Collectors.joining("\n")); - try { - this.classLoader = this.getClass().getClassLoader(); - String predicatesAddressString = params.remove("push_down_predicates"); - this.fetchSize = fetchSize; - this.split = new HoodieSplit(params); - if (predicatesAddressString == null) { - predicates = new ScanPredicate[0]; - } else { - long predicatesAddress = Long.parseLong(predicatesAddressString); - if (predicatesAddress != 0) { - predicates = ScanPredicate.parseScanPredicates(predicatesAddress, split.requiredTypes()); - LOG.info("HudiJniScanner gets pushed-down predicates: " + ScanPredicate.dump(predicates)); - } else { - predicates = new ScanPredicate[0]; - } - } - ugi = Utils.getUserGroupInformation(split.hadoopConf()); - } catch (Exception e) { - LOG.error("Failed to initialize hudi scanner, split params:\n" + debugString, e); - throw e; - } - } - - @Override - public void open() throws IOException { - Future avroFuture = avroReadPool.submit(() -> { - Thread.currentThread().setContextClassLoader(classLoader); - initTableInfo(split.requiredTypes(), split.requiredFields(), predicates, fetchSize); - long startTime = System.nanoTime(); - // RecordReader will use ProcessBuilder to start a hotspot process, which may be stuck, - // so use another process to kill this stuck process. - // TODO(gaoxin): better way to solve the stuck process? - AtomicBoolean isKilled = new AtomicBoolean(false); - ScheduledExecutorService executorService = Executors.newScheduledThreadPool(1); - executorService.scheduleAtFixedRate(() -> { - if (!isKilled.get()) { - synchronized (HudiJniScanner.class) { - List pids = Utils.getChildProcessIds( - Utils.getCurrentProcId()); - for (long pid : pids) { - String cmd = Utils.getCommandLine(pid); - if (cmd != null && cmd.contains("org.openjdk.jol.vm.sa.AttachMain")) { - Utils.killProcess(pid); - isKilled.set(true); - LOG.info("Kill hotspot debugger process " + pid); - } - } - } - } - }, 100, 1000, TimeUnit.MILLISECONDS); - - cleanResolverLock.readLock().lock(); - try { - lastUpdateTime.set(System.currentTimeMillis()); - if (ugi != null) { - recordIterator = ugi.doAs( - (PrivilegedExceptionAction>) () -> new MORSnapshotSplitReader( - split).buildScanIterator(new Filter[0])); - } else { - recordIterator = new MORSnapshotSplitReader(split) - .buildScanIterator(new Filter[0]); - } - if (AVRO_RESOLVER_CACHE != null && AVRO_RESOLVER_CACHE.get() != null) { - cachedResolvers.computeIfAbsent(Thread.currentThread().getId(), - threadId -> AVRO_RESOLVER_CACHE.get()); - AVRO_RESOLVER_CACHE.get().clear(); - } - } catch (Exception e) { - LOG.error("Failed to open hudi scanner, split params:\n" + debugString, e); - throw new RuntimeException(e.getMessage(), e); - } finally { - cleanResolverLock.readLock().unlock(); - } - isKilled.set(true); - executorService.shutdownNow(); - getRecordReaderTimeNs += System.nanoTime() - startTime; - }); - try { - avroFuture.get(); - } catch (Exception e) { - throw new IOException(e.getMessage(), e); - } - } - - @Override - public void close() throws IOException { - if (recordIterator instanceof Closeable) { - ((Closeable) recordIterator).close(); - } - recordIterator = null; - } - - @Override - public int getNext() throws IOException { - try { - int readRowNumbers = 0; - HudiColumnValue columnValue = new HudiColumnValue(); - int numFields = split.requiredFields().length; - ColumnType[] columnTypes = split.requiredTypes(); - while (readRowNumbers < fetchSize && recordIterator.hasNext()) { - columnValue.reset(recordIterator.next()); - for (int i = 0; i < numFields; i++) { - columnValue.reset(i, columnTypes[i].getPrecision(), columnTypes[i].getScale()); - appendData(i, columnValue); - } - readRowNumbers++; - } - return readRowNumbers; - } catch (Exception e) { - close(); - LOG.error("Failed to get the next batch of hudi, split params:\n" + debugString, e); - throw new IOException("Failed to get the next batch of hudi.", e); - } - } - - @Override - public Map getStatistics() { - return Collections.singletonMap("timer:GetRecordReaderTime", String.valueOf(getRecordReaderTimeNs)); - } -} diff --git a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java deleted file mode 100644 index 9dcfacebb8c087..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java +++ /dev/null @@ -1,134 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.hudi; - -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import sun.management.VMManagement; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.lang.management.ManagementFactory; -import java.lang.management.RuntimeMXBean; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.security.PrivilegedExceptionAction; -import java.util.LinkedList; -import java.util.List; - -public class Utils { - public static class Constants { - public static String HADOOP_USER_NAME = "hadoop.username"; - public static String HADOOP_SECURITY_AUTHENTICATION = "hadoop.security.authentication"; - public static String HADOOP_KERBEROS_PRINCIPAL = "hadoop.kerberos.principal"; - public static String HADOOP_KERBEROS_KEYTAB = "hadoop.kerberos.keytab"; - } - - public static UserGroupInformation getUserGroupInformation(Configuration conf) { - String authentication = conf.get(Constants.HADOOP_SECURITY_AUTHENTICATION, null); - if ("kerberos".equals(authentication)) { - conf.set("hadoop.security.authorization", "true"); - UserGroupInformation.setConfiguration(conf); - String principal = conf.get(Constants.HADOOP_KERBEROS_PRINCIPAL); - String keytab = conf.get(Constants.HADOOP_KERBEROS_KEYTAB); - try { - UserGroupInformation ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab); - UserGroupInformation.setLoginUser(ugi); - return ugi; - } catch (IOException e) { - throw new RuntimeException(e); - } - } else { - String hadoopUserName = conf.get(Constants.HADOOP_USER_NAME); - if (hadoopUserName != null) { - return UserGroupInformation.createRemoteUser(hadoopUserName); - } - } - return null; - } - - public static long getCurrentProcId() { - try { - RuntimeMXBean mxbean = ManagementFactory.getRuntimeMXBean(); - Field jvmField = mxbean.getClass().getDeclaredField("jvm"); - jvmField.setAccessible(true); - VMManagement management = (VMManagement) jvmField.get(mxbean); - Method method = management.getClass().getDeclaredMethod("getProcessId"); - method.setAccessible(true); - return (long) (Integer) method.invoke(management); - } catch (Exception e) { - throw new RuntimeException("Couldn't find PID of current JVM process.", e); - } - } - - public static List getChildProcessIds(long pid) { - try { - Process pgrep = (new ProcessBuilder("pgrep", "-P", String.valueOf(pid))).start(); - BufferedReader reader = new BufferedReader(new InputStreamReader(pgrep.getInputStream())); - List result = new LinkedList<>(); - String line; - while ((line = reader.readLine()) != null) { - result.add(Long.valueOf(line.trim())); - } - pgrep.waitFor(); - return result; - } catch (Exception e) { - throw new RuntimeException("Couldn't get child processes of PID " + pid, e); - } - } - - public static String getCommandLine(long pid) { - try { - return FileUtils.readFileToString(new File(String.format("/proc/%d/cmdline", pid))).trim(); - } catch (IOException e) { - return null; - } - } - - public static void killProcess(long pid) { - try { - Process kill = (new ProcessBuilder("kill", "-9", String.valueOf(pid))).start(); - kill.waitFor(); - } catch (Exception e) { - throw new RuntimeException("Couldn't kill process PID " + pid, e); - } - } - - public static HoodieTableMetaClient getMetaClient(Configuration conf, String basePath) { - UserGroupInformation ugi = getUserGroupInformation(conf); - HoodieTableMetaClient metaClient; - if (ugi != null) { - try { - metaClient = ugi.doAs( - (PrivilegedExceptionAction) () -> HoodieTableMetaClient.builder() - .setConf(conf).setBasePath(basePath).build()); - } catch (IOException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException("Cannot get hudi client.", e); - } - } else { - metaClient = HoodieTableMetaClient.builder().setConf(conf).setBasePath(basePath).build(); - } - return metaClient; - } -} diff --git a/fe/be-java-extensions/hudi-scanner/src/main/resources/package.xml b/fe/be-java-extensions/hudi-scanner/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala deleted file mode 100644 index 3c10f8a4cd7208..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala +++ /dev/null @@ -1,725 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.hudi - -import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} -import org.apache.avro.Schema -import org.apache.avro.generic.GenericRecord -import org.apache.doris.common.jni.vec.ColumnType -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.hadoop.hbase.io.hfile.CacheConfig -import org.apache.hudi.HoodieBaseRelation.{BaseFileReader, convertToAvroSchema} -import org.apache.hudi.HoodieConversionUtils.toScalaOption -import org.apache.hudi.avro.HoodieAvroUtils -import org.apache.hudi.client.utils.SparkInternalSchemaConverter -import org.apache.hudi.common.config.{ConfigProperty, HoodieMetadataConfig, TypedProperties} -import org.apache.hudi.common.fs.FSUtils -import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord} -import org.apache.hudi.common.table.timeline.HoodieTimeline -import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver} -import org.apache.hudi.common.util.ValidationUtils.checkState -import org.apache.hudi.common.util.{ConfigUtils, StringUtils} -import org.apache.hudi.config.HoodieWriteConfig -import org.apache.hudi.hadoop.CachingPath -import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter -import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper} -import org.apache.hudi.internal.schema.{HoodieSchemaException, InternalSchema} -import org.apache.hudi.io.storage.HoodieAvroHFileReader -import org.apache.hudi.metadata.HoodieTableMetadataUtil -import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema, HoodieTableState} -import org.apache.log4j.Logger -import org.apache.spark.sql.adapter.Spark3_2Adapter -import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters} -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat -import org.apache.spark.sql.execution.datasources.{PartitionedFile, PartitioningUtils} -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.sources.Filter -import org.apache.spark.sql.types.{StringType, StructField, StructType} -import org.apache.spark.sql.vectorized.ColumnarBatch -import org.apache.spark.sql.{SQLContext, SparkSession, SparkSessionExtensions} -import org.apache.spark.unsafe.types.UTF8String -import org.apache.spark.{SparkConf, SparkContext} - -import java.lang.reflect.Constructor -import java.net.URI -import java.util.Objects -import java.util.concurrent.TimeUnit -import java.{util => jutil} -import scala.collection.JavaConverters._ -import scala.util.control.NonFatal -import scala.util.{Failure, Success, Try} - -class DorisSparkAdapter extends Spark3_2Adapter { - override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters -} - -class HoodieSplit(private val params: jutil.Map[String, String]) { - val queryId: String = params.remove("query_id") - val basePath: String = params.remove("base_path") - val dataFilePath: String = params.remove("data_file_path") - val dataFileLength: Long = params.remove("data_file_length").toLong - val deltaFilePaths: Array[String] = { - val deltas = params.remove("delta_file_paths") - if (StringUtils.isNullOrEmpty(deltas)) new Array[String](0) else deltas.split(",") - } - - val hudiColumnNames: Array[String] = params.remove("hudi_column_names").split(",") - val hudiColumnTypes: Map[String, String] = hudiColumnNames.zip( - params.remove("hudi_column_types").split("#")).toMap - - val requiredFields: Array[String] = { - val readFields = params.remove("required_fields").split(",").filter(_.nonEmpty) - if (readFields.isEmpty) { - // If only read the partition columns, the JniConnector will produce empty required fields. - // Read the "_hoodie_record_key" field at least to know how many rows in current hoodie split - // Even if the JniConnector doesn't read this field, the call of releaseTable will reclaim the resource - Array(HoodieRecord.RECORD_KEY_METADATA_FIELD) - } else { - readFields - } - } - val requiredTypes: Array[ColumnType] = requiredFields.map( - field => ColumnType.parseType(field, hudiColumnTypes(field))) - - val nestedFields: Array[String] = { - val fields = params.remove("nested_fields") - if (StringUtils.isNullOrEmpty(fields)) new Array[String](0) else fields.split(",") - } - val instantTime: String = params.remove("instant_time") - val serde: String = params.remove("serde") - val inputFormat: String = params.remove("input_format") - - val hadoopProperties: Map[String, String] = { - val properties = new jutil.HashMap[String, String] - val iterator = params.entrySet().iterator() - while (iterator.hasNext) { - val kv = iterator.next() - if (kv.getKey.startsWith(BaseSplitReader.HADOOP_CONF_PREFIX)) { - properties.put(kv.getKey.substring(BaseSplitReader.HADOOP_CONF_PREFIX.length), kv.getValue) - iterator.remove() - } - } - properties.asScala.toMap - } - - lazy val hadoopConf: Configuration = { - val conf = new Configuration - hadoopProperties.foreach(kv => conf.set(kv._1, kv._2)) - conf - } - - // NOTE: In cases when Hive Metastore is used as catalog and the table is partitioned, schema in the HMS might contain - // Hive-specific partitioning columns created specifically for HMS to handle partitioning appropriately. In that - // case we opt in to not be providing catalog's schema, and instead force Hudi relations to fetch the schema - // from the table itself - val schemaSpec: Option[StructType] = None - - val optParams: Map[String, String] = params.asScala.toMap - - override def equals(obj: Any): Boolean = { - if (obj == null) { - return false - } - obj match { - case split: HoodieSplit => - hashCode() == split.hashCode() - case _ => false - } - } - - override def hashCode(): Int = { - Objects.hash(queryId, basePath) - } -} - -case class HoodieTableInformation(sparkSession: SparkSession, - metaClient: HoodieTableMetaClient, - timeline: HoodieTimeline, - tableConfig: HoodieTableConfig, - resolvedTargetFields: Array[String], - tableAvroSchema: Schema, - internalSchemaOpt: Option[InternalSchema]) - -/** - * Reference to Apache Hudi - * see HoodieBaseRelation - */ -abstract class BaseSplitReader(val split: HoodieSplit) { - - import BaseSplitReader._ - - protected val optParams: Map[String, String] = split.optParams - - protected val tableInformation: HoodieTableInformation = cache.get(split) - - protected val sparkSession: SparkSession = tableInformation.sparkSession - protected val sqlContext: SQLContext = sparkSession.sqlContext - imbueConfigs(sqlContext) - - protected val tableConfig: HoodieTableConfig = tableInformation.tableConfig - protected val tableName: String = tableConfig.getTableName - - // NOTE: Record key-field is assumed singular here due to the either of - // - In case Hudi's meta fields are enabled: record key will be pre-materialized (stored) as part - // of the record's payload (as part of the Hudi's metadata) - // - In case Hudi's meta fields are disabled (virtual keys): in that case record has to bear _single field_ - // identified as its (unique) primary key w/in its payload (this is a limitation of [[SimpleKeyGenerator]], - // which is the only [[KeyGenerator]] permitted for virtual-keys payloads) - protected lazy val recordKeyField: String = - if (tableConfig.populateMetaFields()) { - HoodieRecord.RECORD_KEY_METADATA_FIELD - } else { - val keyFields = tableConfig.getRecordKeyFields.get() - checkState(keyFields.length == 1) - keyFields.head - } - - protected lazy val preCombineFieldOpt: Option[String] = - Option(tableConfig.getPreCombineField) - .orElse(optParams.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key)) match { - // NOTE: This is required to compensate for cases when empty string is used to stub - // property value to avoid it being set with the default value - // TODO(HUDI-3456) cleanup - case Some(f) if !StringUtils.isNullOrEmpty(f) => Some(f) - case _ => None - } - - /** - * Columns that relation has to read from the storage to properly execute on its semantic: for ex, - * for Merge-on-Read tables key fields as well and pre-combine field comprise mandatory set of columns, - * meaning that regardless of whether this columns are being requested by the query they will be fetched - * regardless so that relation is able to combine records properly (if necessary) - * - * @VisibleInTests - */ - val mandatoryFields: Seq[String] - - /** - * NOTE: Initialization of teh following members is coupled on purpose to minimize amount of I/O - * required to fetch table's Avro and Internal schemas - */ - protected lazy val (tableAvroSchema: Schema, internalSchemaOpt: Option[InternalSchema]) = { - (tableInformation.tableAvroSchema, tableInformation.internalSchemaOpt) - } - - protected lazy val tableStructSchema: StructType = convertAvroSchemaToStructType(tableAvroSchema) - - protected lazy val partitionColumns: Array[String] = tableConfig.getPartitionFields.orElse(Array.empty) - - protected lazy val specifiedQueryTimestamp: Option[String] = Some(split.instantTime) - - private def queryTimestamp: Option[String] = - specifiedQueryTimestamp.orElse(toScalaOption(timeline.lastInstant()).map(_.getTimestamp)) - - lazy val tableState: HoodieTableState = { - val recordMergerImpls = ConfigUtils.split2List(getConfigValue(HoodieWriteConfig.RECORD_MERGER_IMPLS)).asScala.toList - val recordMergerStrategy = getConfigValue(HoodieWriteConfig.RECORD_MERGER_STRATEGY, - Option(tableInformation.metaClient.getTableConfig.getRecordMergerStrategy)) - val configProperties = getConfigProperties(sparkSession, optParams) - val metadataConfig = HoodieMetadataConfig.newBuilder() - .fromProperties(configProperties) - .enable(configProperties.getBoolean( - HoodieMetadataConfig.ENABLE.key(), HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS) - && HoodieTableMetadataUtil.isFilesPartitionAvailable(tableInformation.metaClient)) - .build() - - // Subset of the state of table's configuration as of at the time of the query - HoodieTableState( - tablePath = split.basePath, - latestCommitTimestamp = queryTimestamp, - recordKeyField = recordKeyField, - preCombineFieldOpt = preCombineFieldOpt, - usesVirtualKeys = !tableConfig.populateMetaFields(), - recordPayloadClassName = tableConfig.getPayloadClass, - metadataConfig = metadataConfig, - recordMergerImpls = recordMergerImpls, - recordMergerStrategy = recordMergerStrategy - ) - } - - private def getConfigValue(config: ConfigProperty[String], - defaultValueOption: Option[String] = Option.empty): String = { - optParams.getOrElse(config.key(), - sqlContext.getConf(config.key(), defaultValueOption.getOrElse(config.defaultValue()))) - } - - def imbueConfigs(sqlContext: SQLContext): Unit = { - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true") - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true") - // TODO(HUDI-3639) vectorized reader has to be disabled to make sure MORIncrementalRelation is working properly - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false") - } - - def buildScanIterator(filters: Array[Filter]): Iterator[InternalRow] = { - // NOTE: PLEASE READ CAREFULLY BEFORE MAKING CHANGES - // *Appending* additional columns to the ones requested by the caller is not a problem, as those - // will be eliminated by the caller's projection; - // (!) Please note, however, that it's critical to avoid _reordering_ of the requested columns as this - // will break the upstream projection - val targetColumns: Array[String] = appendMandatoryColumns(tableInformation.resolvedTargetFields) - // NOTE: We explicitly fallback to default table's Avro schema to make sure we avoid unnecessary Catalyst > Avro - // schema conversion, which is lossy in nature (for ex, it doesn't preserve original Avro type-names) and - // could have an effect on subsequent de-/serializing records in some exotic scenarios (when Avro unions - // w/ more than 2 types are involved) - val sourceSchema = tableAvroSchema - val (requiredAvroSchema, requiredStructSchema, requiredInternalSchema) = - projectSchema(Either.cond(internalSchemaOpt.isDefined, internalSchemaOpt.get, sourceSchema), targetColumns) - - val tableAvroSchemaStr = tableAvroSchema.toString - val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchemaStr, internalSchemaOpt) - val requiredSchema = HoodieTableSchema( - requiredStructSchema, requiredAvroSchema.toString, Some(requiredInternalSchema)) - - composeIterator(tableSchema, requiredSchema, targetColumns, filters) - } - - /** - * Composes iterator provided file split to read from, table and partition schemas, data filters to be applied - * - * @param tableSchema target table's schema - * @param requiredSchema projected schema required by the reader - * @param requestedColumns columns requested by the query - * @param filters data filters to be applied - * @return instance of RDD (holding [[InternalRow]]s) - */ - protected def composeIterator(tableSchema: HoodieTableSchema, - requiredSchema: HoodieTableSchema, - requestedColumns: Array[String], - filters: Array[Filter]): Iterator[InternalRow] - - private final def appendMandatoryColumns(requestedColumns: Array[String]): Array[String] = { - // For a nested field in mandatory columns, we should first get the root-level field, and then - // check for any missing column, as the requestedColumns should only contain root-level fields - // We should only append root-level field as well - val missing = mandatoryFields.map(col => HoodieAvroUtils.getRootLevelFieldName(col)) - .filter(rootField => !requestedColumns.contains(rootField)) - requestedColumns ++ missing - } - - /** - * Projects provided schema by picking only required (projected) top-level columns from it - * - * @param tableSchema schema to project (either of [[InternalSchema]] or Avro's [[Schema]]) - * @param requiredColumns required top-level columns to be projected - */ - def projectSchema(tableSchema: Either[Schema, InternalSchema], - requiredColumns: Array[String]): (Schema, StructType, InternalSchema) = { - tableSchema match { - case Right(internalSchema) => - checkState(!internalSchema.isEmptySchema) - val prunedInternalSchema = InternalSchemaUtils.pruneInternalSchema( - internalSchema, requiredColumns.toList.asJava) - val requiredAvroSchema = AvroInternalSchemaConverter.convert(prunedInternalSchema, "schema") - val requiredStructSchema = convertAvroSchemaToStructType(requiredAvroSchema) - - (requiredAvroSchema, requiredStructSchema, prunedInternalSchema) - - case Left(avroSchema) => - val fieldMap = avroSchema.getFields.asScala.map(f => f.name() -> f).toMap - val requiredFields = requiredColumns.map { col => - val f = fieldMap(col) - // We have to create a new [[Schema.Field]] since Avro schemas can't share field - // instances (and will throw "org.apache.avro.AvroRuntimeException: Field already used") - new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order()) - }.toList - val requiredAvroSchema = Schema.createRecord(avroSchema.getName, avroSchema.getDoc, - avroSchema.getNamespace, avroSchema.isError, requiredFields.asJava) - val requiredStructSchema = convertAvroSchemaToStructType(requiredAvroSchema) - - (requiredAvroSchema, requiredStructSchema, InternalSchema.getEmptyInternalSchema) - } - } - - /** - * Converts Avro's [[Schema]] to Catalyst's [[StructType]] - */ - protected def convertAvroSchemaToStructType(avroSchema: Schema): StructType = { - val schemaConverters = sparkAdapter.getAvroSchemaConverters - schemaConverters.toSqlType(avroSchema) match { - case (dataType, _) => dataType.asInstanceOf[StructType] - } - } - - protected def tryPrunePartitionColumns(tableSchema: HoodieTableSchema, - requiredSchema: HoodieTableSchema): (StructType, HoodieTableSchema, HoodieTableSchema) = { - // Since schema requested by the caller might contain partition columns, we might need to - // prune it, removing all partition columns from it in case these columns are not persisted - // in the data files - // - // NOTE: This partition schema is only relevant to file reader to be able to embed - // values of partition columns (hereafter referred to as partition values) encoded into - // the partition path, and omitted from the data file, back into fetched rows; - // Note that, by default, partition columns are not omitted therefore specifying - // partition schema for reader is not required - if (shouldExtractPartitionValuesFromPartitionPath) { - val partitionSchema = StructType(partitionColumns.map(StructField(_, StringType))) - val prunedDataStructSchema = prunePartitionColumns(tableSchema.structTypeSchema) - val prunedRequiredSchema = prunePartitionColumns(requiredSchema.structTypeSchema) - - (partitionSchema, - HoodieTableSchema(prunedDataStructSchema, convertToAvroSchema(prunedDataStructSchema, tableName).toString), - HoodieTableSchema(prunedRequiredSchema, convertToAvroSchema(prunedRequiredSchema, tableName).toString)) - } else { - (StructType(Nil), tableSchema, requiredSchema) - } - } - - /** - * Controls whether partition values (ie values of partition columns) should be - *

    - *
  1. Extracted from partition path and appended to individual rows read from the data file (we - * delegate this to Spark's [[ParquetFileFormat]])
  2. - *
  3. Read from the data-file as is (by default Hudi persists all columns including partition ones)
  4. - *
- * - * This flag is only be relevant in conjunction with the usage of [["hoodie.datasource.write.drop.partition.columns"]] - * config, when Hudi will NOT be persisting partition columns in the data file, and therefore values for - * such partition columns (ie "partition values") will have to be parsed from the partition path, and appended - * to every row only in the fetched dataset. - * - * NOTE: Partition values extracted from partition path might be deviating from the values of the original - * partition columns: for ex, if originally as partition column was used column [[ts]] bearing epoch - * timestamp, which was used by [[TimestampBasedKeyGenerator]] to generate partition path of the format - * [["yyyy/mm/dd"]], appended partition value would bear the format verbatim as it was used in the - * partition path, meaning that string value of "2022/01/01" will be appended, and not its original - * representation - */ - protected val shouldExtractPartitionValuesFromPartitionPath: Boolean = { - // Controls whether partition columns (which are the source for the partition path values) should - // be omitted from persistence in the data files. On the read path it affects whether partition values (values - // of partition columns) will be read from the data file or extracted from partition path - - val shouldOmitPartitionColumns = tableInformation.tableConfig.shouldDropPartitionColumns && partitionColumns.nonEmpty - val shouldExtractPartitionValueFromPath = - optParams.getOrElse(DataSourceReadOptions.EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH.key, - DataSourceReadOptions.EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH.defaultValue.toString).toBoolean - shouldOmitPartitionColumns || shouldExtractPartitionValueFromPath - } - - private def prunePartitionColumns(dataStructSchema: StructType): StructType = - StructType(dataStructSchema.filterNot(f => partitionColumns.contains(f.name))) - - /** - * For enable hoodie.datasource.write.drop.partition.columns, need to create an InternalRow on partition values - * and pass this reader on parquet file. So that, we can query the partition columns. - */ - protected def getPartitionColumnsAsInternalRow(): InternalRow = { - try { - if (shouldExtractPartitionValuesFromPartitionPath) { - val filePath = new Path(split.dataFilePath) - val tablePathWithoutScheme = CachingPath.getPathWithoutSchemeAndAuthority(tableInformation.metaClient.getBasePathV2) - val partitionPathWithoutScheme = CachingPath.getPathWithoutSchemeAndAuthority(filePath.getParent) - val relativePath = new URI(tablePathWithoutScheme.toString).relativize(new URI(partitionPathWithoutScheme.toString)).toString - val hiveStylePartitioningEnabled = tableConfig.getHiveStylePartitioningEnable.toBoolean - if (hiveStylePartitioningEnabled) { - val partitionSpec = PartitioningUtils.parsePathFragment(relativePath) - InternalRow.fromSeq(partitionColumns.map(partitionSpec(_)).map(UTF8String.fromString)) - } else { - if (partitionColumns.length == 1) { - InternalRow.fromSeq(Seq(UTF8String.fromString(relativePath))) - } else { - val parts = relativePath.split("/") - assert(parts.size == partitionColumns.length) - InternalRow.fromSeq(parts.map(UTF8String.fromString)) - } - } - } else { - InternalRow.empty - } - } catch { - case NonFatal(e) => - LOG.warn(s"Failed to get the right partition InternalRow for file: ${split.dataFilePath}", e) - InternalRow.empty - } - } - - /** - * Wrapper for `buildReaderWithPartitionValues` of [[ParquetFileFormat]] handling [[ColumnarBatch]], - * when Parquet's Vectorized Reader is used - * - * TODO move to HoodieBaseRelation, make private - */ - private[hudi] def buildHoodieParquetReader(sparkSession: SparkSession, - dataSchema: StructType, - partitionSchema: StructType, - requiredSchema: StructType, - filters: Seq[Filter], - options: Map[String, String], - hadoopConf: Configuration, - appendPartitionValues: Boolean = false): PartitionedFile => Iterator[InternalRow] = { - val parquetFileFormat: ParquetFileFormat = sparkAdapter.createHoodieParquetFileFormat(appendPartitionValues).get - val readParquetFile: PartitionedFile => Iterator[Any] = parquetFileFormat.buildReaderWithPartitionValues( - sparkSession = sparkSession, - dataSchema = dataSchema, - partitionSchema = partitionSchema, - requiredSchema = requiredSchema, - filters = filters, - options = options, - hadoopConf = hadoopConf - ) - - file: PartitionedFile => { - val iter = readParquetFile(file) - iter.flatMap { - case r: InternalRow => Seq(r) - case b: ColumnarBatch => b.rowIterator().asScala - } - } - } - - private def createHFileReader(spark: SparkSession, - dataSchema: HoodieTableSchema, - requiredDataSchema: HoodieTableSchema, - filters: Seq[Filter], - options: Map[String, String], - hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = { - partitionedFile => { - val reader = new HoodieAvroHFileReader( - hadoopConf, new Path(partitionedFile.filePath), new CacheConfig(hadoopConf)) - - val requiredRowSchema = requiredDataSchema.structTypeSchema - // NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable - // to be passed from driver to executor - val requiredAvroSchema = new Schema.Parser().parse(requiredDataSchema.avroSchemaStr) - val avroToRowConverter = AvroConversionUtils.createAvroToInternalRowConverter( - requiredAvroSchema, requiredRowSchema) - - reader.getRecordIterator(requiredAvroSchema).asScala - .map(record => { - avroToRowConverter.apply(record.getData.asInstanceOf[GenericRecord]).get - }) - } - } - - /** - * Returns file-reader routine accepting [[PartitionedFile]] and returning an [[Iterator]] - * over [[InternalRow]] - */ - protected def createBaseFileReader(spark: SparkSession, - partitionSchema: StructType, - dataSchema: HoodieTableSchema, - requiredDataSchema: HoodieTableSchema, - filters: Seq[Filter], - options: Map[String, String], - hadoopConf: Configuration): BaseFileReader = { - val tableBaseFileFormat = tableConfig.getBaseFileFormat - - // NOTE: PLEASE READ CAREFULLY - // Lambda returned from this method is going to be invoked on the executor, and therefore - // we have to eagerly initialize all of the readers even though only one specific to the type - // of the file being read will be used. This is required to avoid serialization of the whole - // relation (containing file-index for ex) and passing it to the executor - val (read: (PartitionedFile => Iterator[InternalRow]), schema: StructType) = - tableBaseFileFormat match { - case HoodieFileFormat.PARQUET => - val parquetReader = buildHoodieParquetReader( - sparkSession = spark, - dataSchema = dataSchema.structTypeSchema, - partitionSchema = partitionSchema, - requiredSchema = requiredDataSchema.structTypeSchema, - filters = filters, - options = options, - hadoopConf = hadoopConf, - // We're delegating to Spark to append partition values to every row only in cases - // when these corresponding partition-values are not persisted w/in the data file itself - appendPartitionValues = shouldExtractPartitionValuesFromPartitionPath - ) - // Since partition values by default are omitted, and not persisted w/in data-files by Spark, - // data-file readers (such as [[ParquetFileFormat]]) have to inject partition values while reading - // the data. As such, actual full schema produced by such reader is composed of - // a) Data-file schema (projected or not) - // b) Appended partition column values - val readerSchema = StructType(requiredDataSchema.structTypeSchema.fields ++ partitionSchema.fields) - - (parquetReader, readerSchema) - - case HoodieFileFormat.HFILE => - val hfileReader = createHFileReader( - spark = spark, - dataSchema = dataSchema, - requiredDataSchema = requiredDataSchema, - filters = filters, - options = options, - hadoopConf = hadoopConf - ) - - (hfileReader, requiredDataSchema.structTypeSchema) - - case _ => throw new UnsupportedOperationException(s"Base file format is not currently supported ($tableBaseFileFormat)") - } - - BaseFileReader( - read = partitionedFile => { - val extension = FSUtils.getFileExtension(partitionedFile.filePath) - if (tableBaseFileFormat.getFileExtension.equals(extension)) { - read(partitionedFile) - } else { - throw new UnsupportedOperationException(s"Invalid base-file format ($extension), expected ($tableBaseFileFormat)") - } - }, - schema = schema - ) - } - - protected val timeline: HoodieTimeline = tableInformation.timeline - - protected def embedInternalSchema(conf: Configuration, internalSchemaOpt: Option[InternalSchema]): Configuration = { - val internalSchema = internalSchemaOpt.getOrElse(InternalSchema.getEmptyInternalSchema) - val querySchemaString = SerDeHelper.toJson(internalSchema) - if (!StringUtils.isNullOrEmpty(querySchemaString)) { - val validCommits = timeline.getInstants.iterator.asScala.map(_.getFileName).mkString(",") - LOG.warn(s"Table valid commits: $validCommits") - - conf.set(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, SerDeHelper.toJson(internalSchema)) - conf.set(SparkInternalSchemaConverter.HOODIE_TABLE_PATH, split.basePath) - conf.set(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST, validCommits) - } - conf - } -} - -object SparkMockHelper { - private lazy val mockSparkContext = { - val conf = new SparkConf().setMaster("local").setAppName("mock_sc") - .set("spark.ui.enabled", "false") - val sc = new SparkContext(conf) - sc.setLogLevel("WARN") - sc - } - - implicit class MockSparkSession(builder: SparkSession.Builder) { - def createMockSession(split: HoodieSplit): SparkSession = { - val sparkSessionClass = classOf[SparkSession] - val constructor: Constructor[SparkSession] = sparkSessionClass.getDeclaredConstructors - .find(_.getParameterCount == 5).get.asInstanceOf[Constructor[SparkSession]] - constructor.setAccessible(true) - val ss = constructor.newInstance(mockSparkContext, None, None, new SparkSessionExtensions, Map.empty) - split.hadoopProperties.foreach(kv => ss.sessionState.conf.setConfString(kv._1, kv._2)) - ss - } - } -} - -object BaseSplitReader { - - import SparkMockHelper.MockSparkSession - - private val LOG = Logger.getLogger(BaseSplitReader.getClass) - val HADOOP_CONF_PREFIX = "hadoop_conf." - - // Use [[SparkAdapterSupport]] instead ? - private lazy val sparkAdapter = new DorisSparkAdapter - - private lazy val cache: LoadingCache[HoodieSplit, HoodieTableInformation] = { - val loader = new CacheLoader[HoodieSplit, HoodieTableInformation] { - override def load(split: HoodieSplit): HoodieTableInformation = { - // create mock spark session - val sparkSession = SparkSession.builder().createMockSession(split) - val metaClient = Utils.getMetaClient(split.hadoopConf, split.basePath) - // NOTE: We're including compaction here since it's not considering a "commit" operation - val timeline = metaClient.getCommitsAndCompactionTimeline.filterCompletedInstants - - val specifiedQueryTimestamp: Option[String] = Some(split.instantTime) - val schemaResolver = new TableSchemaResolver(metaClient) - val internalSchemaOpt = if (!isSchemaEvolutionEnabledOnRead(split.optParams, sparkSession)) { - None - } else { - Try { - specifiedQueryTimestamp.map(schemaResolver.getTableInternalSchemaFromCommitMetadata) - .getOrElse(schemaResolver.getTableInternalSchemaFromCommitMetadata) - } match { - case Success(internalSchemaOpt) => toScalaOption(internalSchemaOpt) - case Failure(_) => - None - } - } - val tableName = metaClient.getTableConfig.getTableName - val (name, namespace) = AvroConversionUtils.getAvroRecordNameAndNamespace(tableName) - val avroSchema: Schema = internalSchemaOpt.map { is => - AvroInternalSchemaConverter.convert(is, namespace + "." + name) - } orElse { - specifiedQueryTimestamp.map(schemaResolver.getTableAvroSchema) - } orElse { - split.schemaSpec.map(s => convertToAvroSchema(s, tableName)) - } getOrElse { - Try(schemaResolver.getTableAvroSchema) match { - case Success(schema) => schema - case Failure(e) => - throw new HoodieSchemaException("Failed to fetch schema from the table", e) - } - } - - // match column name in lower case - val colNames = internalSchemaOpt.map { internalSchema => - internalSchema.getAllColsFullName.asScala.map(f => f.toLowerCase -> f).toMap - } getOrElse { - avroSchema.getFields.asScala.map(f => f.name().toLowerCase -> f.name()).toMap - } - val resolvedTargetFields = split.requiredFields.map(field => colNames.getOrElse(field.toLowerCase, field)) - - HoodieTableInformation(sparkSession, - metaClient, - timeline, - metaClient.getTableConfig, - resolvedTargetFields, - avroSchema, - internalSchemaOpt) - } - } - CacheBuilder.newBuilder() - .expireAfterAccess(10, TimeUnit.MINUTES) - .maximumSize(4096) - .build(loader) - } - - private def isSchemaEvolutionEnabledOnRead(optParams: Map[String, String], sparkSession: SparkSession): Boolean = { - // NOTE: Schema evolution could be configured both t/h optional parameters vehicle as well as - // t/h Spark Session configuration (for ex, for Spark SQL) - optParams.getOrElse(DataSourceReadOptions.SCHEMA_EVOLUTION_ENABLED.key, - DataSourceReadOptions.SCHEMA_EVOLUTION_ENABLED.defaultValue.toString).toBoolean || - sparkSession.sessionState.conf.getConfString(DataSourceReadOptions.SCHEMA_EVOLUTION_ENABLED.key, - DataSourceReadOptions.SCHEMA_EVOLUTION_ENABLED.defaultValue.toString).toBoolean - } - - private def getConfigProperties(spark: SparkSession, options: Map[String, String]) = { - val sqlConf: SQLConf = spark.sessionState.conf - val properties = new TypedProperties() - // Ambiguous reference when invoking Properties.putAll() in Java 11 - // Reference https://github.com/scala/bug/issues/10418 - options.filter(p => p._2 != null).foreach(p => properties.setProperty(p._1, p._2)) - - // TODO(HUDI-5361) clean up properties carry-over - - // To support metadata listing via Spark SQL we allow users to pass the config via SQL Conf in spark session. Users - // would be able to run SET hoodie.metadata.enable=true in the spark sql session to enable metadata listing. - val isMetadataTableEnabled = HoodieSparkConfUtils.getConfigValue(options, sqlConf, HoodieMetadataConfig.ENABLE.key, null) - if (isMetadataTableEnabled != null) { - properties.setProperty(HoodieMetadataConfig.ENABLE.key(), String.valueOf(isMetadataTableEnabled)) - } - - val listingModeOverride = HoodieSparkConfUtils.getConfigValue(options, sqlConf, - DataSourceReadOptions.FILE_INDEX_LISTING_MODE_OVERRIDE.key, null) - if (listingModeOverride != null) { - properties.setProperty(DataSourceReadOptions.FILE_INDEX_LISTING_MODE_OVERRIDE.key, listingModeOverride) - } - - properties - } -} diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala deleted file mode 100644 index 6e2b7b31e547bc..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.hudi - -import org.apache.hadoop.conf.Configuration -import org.apache.hudi.HoodieBaseRelation.{BaseFileReader, projectReader} -import org.apache.hudi.MergeOnReadSnapshotRelation.isProjectionCompatible -import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, HoodieTableSchema, HoodieTableState, LogFileIterator, RecordMergingFileIterator} -import org.apache.spark.sql.HoodieCatalystExpressionUtils.generateUnsafeProjection -import org.apache.spark.sql.catalyst.InternalRow - -import java.io.Closeable - -/** - * Class holding base-file readers for 3 different use-cases: - * - *
    - *
  1. Full-schema reader: is used when whole row has to be read to perform merging correctly. - * This could occur, when no optimizations could be applied and we have to fallback to read the whole row from - * the base file and the corresponding delta-log file to merge them correctly
  2. - * - *
  3. Required-schema reader: is used when it's fine to only read row's projected columns. - * This could occur, when row could be merged with corresponding delta-log record while leveraging only - * projected columns
  4. - * - *
  5. Required-schema reader (skip-merging): is used when when no merging will be performed (skip-merged). - * This could occur, when file-group has no delta-log files
  6. - *
- */ -private[hudi] case class HoodieMergeOnReadBaseFileReaders(fullSchemaReader: BaseFileReader, - requiredSchemaReader: BaseFileReader, - requiredSchemaReaderSkipMerging: BaseFileReader) - -/** - * Provided w/ instance of [[HoodieMergeOnReadFileSplit]], provides an iterator over all of the records stored in - * Base file as well as all of the Delta Log files simply returning concatenation of these streams, while not - * performing any combination/merging of the records w/ the same primary keys (ie producing duplicates potentially) - */ -private class SkipMergeIterator(split: HoodieMergeOnReadFileSplit, - baseFileReader: BaseFileReader, - dataSchema: HoodieTableSchema, - requiredSchema: HoodieTableSchema, - tableState: HoodieTableState, - config: Configuration) - extends LogFileIterator(split, dataSchema, requiredSchema, tableState, config) { - - private val requiredSchemaProjection = generateUnsafeProjection(baseFileReader.schema, structTypeSchema) - - private val baseFileIterator = baseFileReader(split.dataFile.get) - - override def doHasNext: Boolean = { - if (baseFileIterator.hasNext) { - // No merge is required, simply load current row and project into required schema - nextRecord = requiredSchemaProjection(baseFileIterator.next()) - true - } else { - super[LogFileIterator].doHasNext - } - } -} - -/** - * Reference to Apache Hudi - * see HoodieMergeOnReadRDD - */ -class HoodieMORRecordIterator(config: Configuration, - fileReaders: HoodieMergeOnReadBaseFileReaders, - tableSchema: HoodieTableSchema, - requiredSchema: HoodieTableSchema, - tableState: HoodieTableState, - mergeType: String, - fileSplit: HoodieMergeOnReadFileSplit) extends Iterator[InternalRow] with Closeable { - protected val maxCompactionMemoryInBytes: Long = config.getLongBytes( - "hoodie.compaction.memory", 512 * 1024 * 1024) - - protected val recordIterator: Iterator[InternalRow] = fileSplit match { - case dataFileOnlySplit if dataFileOnlySplit.logFiles.isEmpty => - val projectedReader = projectReader(fileReaders.requiredSchemaReaderSkipMerging, requiredSchema.structTypeSchema) - projectedReader(dataFileOnlySplit.dataFile.get) - - case logFileOnlySplit if logFileOnlySplit.dataFile.isEmpty => - new LogFileIterator(logFileOnlySplit, tableSchema, requiredSchema, tableState, config) - - case split => mergeType match { - case DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL => - // val reader = fileReaders.requiredSchemaReaderSkipMerging - // new SkipMergeIterator(split, reader, tableSchema, requiredSchema, tableState, config) - throw new UnsupportedOperationException("Skip merge is optimized by native read") - - case DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL => - val reader = pickBaseFileReader() - new RecordMergingFileIterator(split, reader, tableSchema, requiredSchema, tableState, config) - - case _ => throw new UnsupportedOperationException(s"Not supported merge type ($mergeType)") - } - } - - private def pickBaseFileReader(): BaseFileReader = { - // NOTE: This is an optimization making sure that even for MOR tables we fetch absolute minimum - // of the stored data possible, while still properly executing corresponding relation's semantic - // and meet the query's requirements. - // - // Here we assume that iff queried table does use one of the standard (and whitelisted) - // Record Payload classes then we can avoid reading and parsing the records w/ _full_ schema, - // and instead only rely on projected one, nevertheless being able to perform merging correctly - if (isProjectionCompatible(tableState)) { - fileReaders.requiredSchemaReader - } else { - fileReaders.fullSchemaReader - } - } - - override def hasNext: Boolean = { - recordIterator.hasNext - } - - override def next(): InternalRow = { - recordIterator.next() - } - - override def close(): Unit = { - recordIterator match { - case closeable: Closeable => - closeable.close() - case _ => - } - } -} diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala deleted file mode 100644 index e9958b231e7a1a..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala +++ /dev/null @@ -1,183 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.hudi - -import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema -import org.apache.hudi.avro.HoodieAvroUtils -import org.apache.hudi.common.model.HoodieLogFile -import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, HoodieTableSchema} -import org.apache.spark.sql.SQLContext -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.execution.datasources.PartitionedFile -import org.apache.spark.sql.sources.Filter -import org.apache.spark.sql.types.StructType - -/** - * Reference to Apache Hudi - * see MergeOnReadSnapshotRelation - */ -class MORSnapshotSplitReader(override val split: HoodieSplit) extends BaseSplitReader(split) { - /** - * NOTE: These are the fields that are required to properly fulfil Merge-on-Read (MOR) - * semantic: - * - *
    - *
  1. Primary key is required to make sure we're able to correlate records from the base - * file with the updated records from the delta-log file
  2. - *
  3. Pre-combine key is required to properly perform the combining (or merging) of the - * existing and updated records
  4. - *
- * - * However, in cases when merging is NOT performed (for ex, if file-group only contains base - * files but no delta-log files, or if the query-type is equal to [["skip_merge"]]) neither - * of primary-key or pre-combine-key are required to be fetched from storage (unless requested - * by the query), therefore saving on throughput - */ - protected lazy val mandatoryFieldsForMerging: Seq[String] = - Seq(recordKeyField) ++ preCombineFieldOpt.map(Seq(_)).getOrElse(Seq()) - - override lazy val mandatoryFields: Seq[String] = mandatoryFieldsForMerging - - protected val mergeType: String = optParams.getOrElse(DataSourceReadOptions.REALTIME_MERGE.key, - DataSourceReadOptions.REALTIME_MERGE.defaultValue) - - override protected def composeIterator(tableSchema: HoodieTableSchema, - requiredSchema: HoodieTableSchema, - requestedColumns: Array[String], - filters: Array[Filter]): Iterator[InternalRow] = { - // todo: push down predicates about key field - val requiredFilters = Seq.empty - val optionalFilters = filters - val readers = createBaseFileReaders(tableSchema, requiredSchema, requestedColumns, requiredFilters, optionalFilters) - - new HoodieMORRecordIterator(split.hadoopConf, - readers, - tableSchema, - requiredSchema, - tableState, - mergeType, - getFileSplit()) - } - - private def getFileSplit(): HoodieMergeOnReadFileSplit = { - val logFiles = split.deltaFilePaths.map(new HoodieLogFile(_)) - .sorted(Ordering.comparatorToOrdering(HoodieLogFile.getLogFileComparator)).toList - val partitionedBaseFile = if (split.dataFilePath.isEmpty) { - None - } else { - Some(PartitionedFile(getPartitionColumnsAsInternalRow(), split.dataFilePath, 0, split.dataFileLength)) - } - HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles) - } - - override def imbueConfigs(sqlContext: SQLContext): Unit = { - super.imbueConfigs(sqlContext) - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true") - // there's no thread local TaskContext, so the parquet reader will still use on heap memory even setting true - sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.columnVector.offheap.enabled", "true") - } - - protected def createBaseFileReaders(tableSchema: HoodieTableSchema, - requiredSchema: HoodieTableSchema, - requestedColumns: Array[String], - requiredFilters: Seq[Filter], - optionalFilters: Seq[Filter] = Seq.empty): HoodieMergeOnReadBaseFileReaders = { - val (partitionSchema, dataSchema, requiredDataSchema) = - tryPrunePartitionColumns(tableSchema, requiredSchema) - - val fullSchemaReader = createBaseFileReader( - spark = sqlContext.sparkSession, - partitionSchema = partitionSchema, - dataSchema = dataSchema, - requiredDataSchema = dataSchema, - // This file-reader is used to read base file records, subsequently merging them with the records - // stored in delta-log files. As such, we have to read _all_ records from the base file, while avoiding - // applying any filtering _before_ we complete combining them w/ delta-log records (to make sure that - // we combine them correctly); - // As such only required filters could be pushed-down to such reader - filters = requiredFilters, - options = optParams, - // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it - // to configure Parquet reader appropriately - hadoopConf = embedInternalSchema(split.hadoopConf, internalSchemaOpt) - ) - - val requiredSchemaReader = createBaseFileReader( - spark = sqlContext.sparkSession, - partitionSchema = partitionSchema, - dataSchema = dataSchema, - requiredDataSchema = requiredDataSchema, - // This file-reader is used to read base file records, subsequently merging them with the records - // stored in delta-log files. As such, we have to read _all_ records from the base file, while avoiding - // applying any filtering _before_ we complete combining them w/ delta-log records (to make sure that - // we combine them correctly); - // As such only required filters could be pushed-down to such reader - filters = requiredFilters, - options = optParams, - // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it - // to configure Parquet reader appropriately - hadoopConf = embedInternalSchema(split.hadoopConf, requiredDataSchema.internalSchema) - ) - - // Check whether fields required for merging were also requested to be fetched - // by the query: - // - In case they were, there's no optimization we could apply here (we will have - // to fetch such fields) - // - In case they were not, we will provide 2 separate file-readers - // a) One which would be applied to file-groups w/ delta-logs (merging) - // b) One which would be applied to file-groups w/ no delta-logs or - // in case query-mode is skipping merging - val mandatoryColumns = mandatoryFieldsForMerging.map(HoodieAvroUtils.getRootLevelFieldName) - if (mandatoryColumns.forall(requestedColumns.contains)) { - HoodieMergeOnReadBaseFileReaders( - fullSchemaReader = fullSchemaReader, - requiredSchemaReader = requiredSchemaReader, - requiredSchemaReaderSkipMerging = requiredSchemaReader - ) - } else { - val prunedRequiredSchema = { - val unusedMandatoryColumnNames = mandatoryColumns.filterNot(requestedColumns.contains) - val prunedStructSchema = - StructType(requiredDataSchema.structTypeSchema.fields - .filterNot(f => unusedMandatoryColumnNames.contains(f.name))) - - HoodieTableSchema(prunedStructSchema, convertToAvroSchema(prunedStructSchema, tableName).toString) - } - - val requiredSchemaReaderSkipMerging = createBaseFileReader( - spark = sqlContext.sparkSession, - partitionSchema = partitionSchema, - dataSchema = dataSchema, - requiredDataSchema = prunedRequiredSchema, - // This file-reader is only used in cases when no merging is performed, therefore it's safe to push - // down these filters to the base file readers - filters = requiredFilters ++ optionalFilters, - options = optParams, - // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it - // to configure Parquet reader appropriately - hadoopConf = embedInternalSchema(split.hadoopConf, requiredDataSchema.internalSchema) - ) - - HoodieMergeOnReadBaseFileReaders( - fullSchemaReader = fullSchemaReader, - requiredSchemaReader = requiredSchemaReader, - requiredSchemaReaderSkipMerging = requiredSchemaReaderSkipMerging - ) - } - } -} diff --git a/fe/be-java-extensions/hudi-scanner/src/test/java/org/apache/doris/hudi/HudiJniScannerTest.java b/fe/be-java-extensions/hudi-scanner/src/test/java/org/apache/doris/hudi/HudiJniScannerTest.java deleted file mode 100644 index 6cdfbdc53e4991..00000000000000 --- a/fe/be-java-extensions/hudi-scanner/src/test/java/org/apache/doris/hudi/HudiJniScannerTest.java +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.hudi; - -import org.junit.Test; - - -/** - * The hudi JniScanner test - */ -public class HudiJniScannerTest { - @Test - public void testOpen() { - } - -} diff --git a/fe/be-java-extensions/java-common/pom.xml b/fe/be-java-extensions/java-common/pom.xml deleted file mode 100644 index 20ed0104fa5943..00000000000000 --- a/fe/be-java-extensions/java-common/pom.xml +++ /dev/null @@ -1,64 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - java-common - - - 8 - 8 - - - - - com.vesoft - client - - - org.apache.doris - fe-common - ${project.version} - - - com.fasterxml.jackson.core - jackson-core - - - org.apache.velocity - velocity-engine-core - - - org.apache.httpcomponents - httpclient - - - com.fasterxml.jackson.core - jackson-databind - - - - diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/JniScannerClassLoader.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/JniScannerClassLoader.java deleted file mode 100644 index 1e6be07e097d0a..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/JniScannerClassLoader.java +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.classloader; - -import java.net.URL; -import java.net.URLClassLoader; -import java.util.List; - -public class JniScannerClassLoader extends URLClassLoader { - - private final String scannerName; - - public JniScannerClassLoader(String scannerName, List urls, ClassLoader parent) { - super(urls.toArray(new URL[0]), parent); - this.scannerName = scannerName; - } - - @Override - public String toString() { - return "JniScannerClassLoader{" - + "scannerName='" + scannerName - + '}'; - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ScannerLoader.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ScannerLoader.java deleted file mode 100644 index c21b6bf0a783fd..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ScannerLoader.java +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.classloader; - -import com.google.common.collect.Streams; - -import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.DirectoryStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; -import java.util.stream.Collectors; - -/** - * BE will load scanners by JNI call, and then the JniConnector on BE will get scanner class by getLoadedClass. - */ -public class ScannerLoader { - private static final Map> loadedClasses = new HashMap<>(); - private static final String CLASS_SUFFIX = ".class"; - private static final String LOAD_PACKAGE = "org.apache.doris"; - - /** - * Load all classes from $DORIS_HOME/lib/java_extensions/* - */ - public void loadAllScannerJars() { - String basePath = System.getenv("DORIS_HOME"); - File library = new File(basePath, "/lib/java_extensions/"); - // TODO: add thread pool to load each scanner - listFiles(library).stream().filter(File::isDirectory).forEach(sd -> { - JniScannerClassLoader classLoader = new JniScannerClassLoader(sd.getName(), buildClassPath(sd), - this.getClass().getClassLoader()); - try (ThreadClassLoaderContext ignored = new ThreadClassLoaderContext(classLoader)) { - loadJarClassFromDir(sd, classLoader); - } - }); - } - - /** - * Get loaded class for JNI scanners - * @param className JNI scanner class name - * @return scanner class object - * @throws ClassNotFoundException JNI scanner class not found - */ - public Class getLoadedClass(String className) throws ClassNotFoundException { - String loadedClassName = getPackagePathName(className); - if (loadedClasses.containsKey(loadedClassName)) { - return loadedClasses.get(loadedClassName); - } else { - throw new ClassNotFoundException("JNI scanner has not been loaded or no such class: " + className); - } - } - - private static List buildClassPath(File path) { - return listFiles(path).stream() - .map(ScannerLoader::classFileUrl) - .collect(Collectors.toList()); - } - - private static URL classFileUrl(File file) { - try { - return file.toURI().toURL(); - } catch (MalformedURLException e) { - throw new UncheckedIOException(e); - } - } - - public static List listFiles(File library) { - try (DirectoryStream directoryStream = Files.newDirectoryStream(library.toPath())) { - return Streams.stream(directoryStream) - .map(Path::toFile) - .sorted() - .collect(Collectors.toList()); - - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static void loadJarClassFromDir(File dir, JniScannerClassLoader classLoader) { - listFiles(dir).forEach(file -> { - Enumeration entryEnumeration; - List loadClassNames = new ArrayList<>(); - try { - try (JarFile jar = new JarFile(file)) { - entryEnumeration = jar.entries(); - while (entryEnumeration.hasMoreElements()) { - JarEntry entry = entryEnumeration.nextElement(); - String className = entry.getName(); - if (!className.endsWith(CLASS_SUFFIX)) { - continue; - } - className = className.substring(0, className.length() - CLASS_SUFFIX.length()); - String packageClassName = getPackagePathName(className); - if (needToLoad(packageClassName)) { - loadClassNames.add(packageClassName); - } - } - } - for (String className : loadClassNames) { - loadedClasses.putIfAbsent(className, classLoader.loadClass(className)); - } - } catch (Exception e) { - throw new RuntimeException(e.getMessage(), e); - } - }); - } - - private static String getPackagePathName(String className) { - return className.replace("/", "."); - } - - private static boolean needToLoad(String className) { - return className.contains(LOAD_PACKAGE) && !className.contains("$"); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ThreadClassLoaderContext.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ThreadClassLoaderContext.java deleted file mode 100644 index 3f358a501614d7..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/classloader/ThreadClassLoaderContext.java +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.classloader; - -import java.io.Closeable; - -public class ThreadClassLoaderContext implements Closeable { - - private final ClassLoader originClassLoader; - - public ThreadClassLoaderContext(ClassLoader contextClassLoader) { - this.originClassLoader = Thread.currentThread().getContextClassLoader(); - Thread.currentThread().setContextClassLoader(contextClassLoader); - } - - @Override - public void close() { - Thread.currentThread().setContextClassLoader(originClassLoader); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/InternalException.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/InternalException.java deleted file mode 100644 index ddd94e50486a41..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/InternalException.java +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.exception; - -public class InternalException extends Exception { - public InternalException(String msg, Throwable cause) { - super(msg, cause); - } - - public InternalException(String msg) { - super(msg); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/UdfRuntimeException.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/UdfRuntimeException.java deleted file mode 100644 index 861b4f46fe2f00..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/exception/UdfRuntimeException.java +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.exception; - -public class UdfRuntimeException extends Exception { - public UdfRuntimeException(String msg, Throwable cause) { - super(msg, cause); - } - - public UdfRuntimeException(String msg) { - super(msg); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java deleted file mode 100644 index 5031a0182671ae..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/JniScanner.java +++ /dev/null @@ -1,130 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni; - - -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ColumnValue; -import org.apache.doris.common.jni.vec.NativeColumnValue; -import org.apache.doris.common.jni.vec.ScanPredicate; -import org.apache.doris.common.jni.vec.TableSchema; -import org.apache.doris.common.jni.vec.VectorTable; - -import java.io.IOException; -import java.util.Collections; -import java.util.Map; - -public abstract class JniScanner { - protected VectorTable vectorTable; - protected String[] fields; - protected ColumnType[] types; - protected ScanPredicate[] predicates; - protected int batchSize; - - // Initialize JniScanner - public abstract void open() throws IOException; - - // Close JniScanner and release resources - public abstract void close() throws IOException; - - // Scan data and save as vector table - protected abstract int getNext() throws IOException; - - // parse table schema - protected TableSchema parseTableSchema() throws UnsupportedOperationException { - throw new UnsupportedOperationException(); - } - - protected void initTableInfo(ColumnType[] requiredTypes, String[] requiredFields, ScanPredicate[] predicates, - int batchSize) { - this.types = requiredTypes; - this.fields = requiredFields; - this.predicates = predicates; - this.batchSize = batchSize; - } - - protected void appendNativeData(int index, NativeColumnValue value) { - vectorTable.appendNativeData(index, value); - } - - protected void appendData(int index, ColumnValue value) { - vectorTable.appendData(index, value); - } - - protected int getBatchSize() { - return batchSize; - } - - public VectorTable getTable() { - return vectorTable; - } - - public String getTableSchema() throws IOException { - TableSchema tableSchema = parseTableSchema(); - return tableSchema.getTableSchema(); - } - - public long getNextBatchMeta() throws IOException { - if (vectorTable == null) { - vectorTable = new VectorTable(types, fields, predicates, batchSize); - } - int numRows; - try { - numRows = getNext(); - } catch (IOException e) { - releaseTable(); - throw e; - } - if (numRows == 0) { - releaseTable(); - return 0; - } - return getMetaAddress(numRows); - } - - /** - * Get performance metrics. The key should be pattern like "metricType:metricName". - * Support three metric types: timer, counter and bytes. - * The c++ side will attach metricName into profile automatically. - */ - public Map getStatistics() { - return Collections.emptyMap(); - } - - private long getMetaAddress(int numRows) { - vectorTable.setNumRows(numRows); - return vectorTable.getMetaAddress(); - } - - public void resetTable() { - if (vectorTable != null) { - vectorTable.reset(); - } - } - - protected void releaseColumn(int fieldId) { - vectorTable.releaseColumn(fieldId); - } - - public void releaseTable() { - if (vectorTable != null) { - vectorTable.close(); - } - vectorTable = null; - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/MockJniScanner.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/MockJniScanner.java deleted file mode 100644 index 3557b3b9032073..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/MockJniScanner.java +++ /dev/null @@ -1,203 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni; - - -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ColumnValue; -import org.apache.doris.common.jni.vec.ScanPredicate; - -import org.apache.log4j.Logger; - -import java.io.IOException; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.charset.StandardCharsets; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.List; -import java.util.Map; - -/** - * The demo usage of JniScanner. This class will only be retained during the functional testing phase to - * verify that the communication and data exchange with the BE are correct. - */ -public class MockJniScanner extends JniScanner { - public static class MockColumnValue implements ColumnValue { - private int i; - private int j; - - public MockColumnValue() { - } - - public void set(int i, int j) { - this.i = i; - this.j = j; - } - - @Override - public boolean canGetStringAsBytes() { - return false; - } - - @Override - public boolean isNull() { - return false; - } - - @Override - public boolean getBoolean() { - return (i + j) % 2 == 0; - } - - @Override - public byte getByte() { - return (byte) (i + j); - } - - @Override - public short getShort() { - return (short) (i - j); - } - - @Override - public int getInt() { - return i + j; - } - - @Override - public float getFloat() { - return (float) (j + i - 11) / (i + 1); - } - - @Override - public long getLong() { - return (long) (i - 13) * (j + 1); - } - - @Override - public double getDouble() { - return (double) (j + i - 15) / (i + 1); - } - - @Override - public BigInteger getBigInteger() { - return BigInteger.valueOf(getLong()); - } - - @Override - public BigDecimal getDecimal() { - return BigDecimal.valueOf(getDouble()); - } - - @Override - public String getString() { - return "row-" + i + "-column-" + j; - } - - @Override - public byte[] getStringAsBytes() { - throw new UnsupportedOperationException(); - } - - @Override - public LocalDate getDate() { - return LocalDate.now(); - } - - @Override - public LocalDateTime getDateTime() { - return LocalDateTime.now(); - } - - @Override - public byte[] getBytes() { - return ("row-" + i + "-column-" + j).getBytes(StandardCharsets.UTF_8); - } - - @Override - public void unpackArray(List values) { - - } - - @Override - public void unpackMap(List keys, List values) { - - } - - @Override - public void unpackStruct(List structFieldIndex, List values) { - - } - } - - private static final Logger LOG = Logger.getLogger(MockJniScanner.class); - - private int mockRows; - private int readRows = 0; - private final MockColumnValue columnValue = new MockColumnValue(); - - public MockJniScanner(int batchSize, Map params) { - mockRows = Integer.parseInt(params.get("mock_rows")); - String[] requiredFields = params.get("required_fields").split(","); - String[] types = params.get("columns_types").split("#"); - ColumnType[] columnTypes = new ColumnType[types.length]; - for (int i = 0; i < types.length; i++) { - columnTypes[i] = ColumnType.parseType(requiredFields[i], types[i]); - } - ScanPredicate[] predicates = new ScanPredicate[0]; - if (params.containsKey("push_down_predicates")) { - long predicatesAddress = Long.parseLong(params.get("push_down_predicates")); - if (predicatesAddress != 0) { - predicates = ScanPredicate.parseScanPredicates(predicatesAddress, columnTypes); - LOG.info("MockJniScanner gets pushed-down predicates: " + ScanPredicate.dump(predicates)); - } - } - initTableInfo(columnTypes, requiredFields, predicates, batchSize); - } - - @Override - public void open() throws IOException { - - } - - @Override - public void close() throws IOException { - - } - - @Override - protected int getNext() throws IOException { - if (readRows == mockRows) { - return 0; - } - int rows = Math.min(batchSize, mockRows - readRows); - for (int i = 0; i < rows; ++i) { - for (int j = 0; j < types.length; ++j) { - if ((i + j) % 16 == 0) { - appendData(j, null); - } else { - columnValue.set(i, j); - appendData(j, columnValue); - } - } - } - readRows += rows; - return rows; - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JMXJsonUtil.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JMXJsonUtil.java deleted file mode 100644 index 02cb53232de11a..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JMXJsonUtil.java +++ /dev/null @@ -1,282 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-4.0.0/fe/src/main/java/org/apache/impala/util/JMXJsonUtil.java -// and modified by Doris - -package org.apache.doris.common.jni.utils; - -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonGenerator; -import org.apache.log4j.Logger; - -import java.io.IOException; -import java.io.StringWriter; -import java.lang.management.ManagementFactory; -import java.lang.reflect.Array; -import java.util.Iterator; -import java.util.Set; -import javax.management.AttributeNotFoundException; -import javax.management.InstanceNotFoundException; -import javax.management.IntrospectionException; -import javax.management.MBeanAttributeInfo; -import javax.management.MBeanException; -import javax.management.MBeanInfo; -import javax.management.MBeanServer; -import javax.management.ObjectName; -import javax.management.ReflectionException; -import javax.management.RuntimeErrorException; -import javax.management.RuntimeMBeanException; -import javax.management.openmbean.CompositeData; -import javax.management.openmbean.CompositeType; -import javax.management.openmbean.TabularData; - -/** - * Utility class that returns a JSON representation of the JMX beans. - * This is based on hadoop-common's implementation of JMXJsonServlet. - *

- * Output format: - * { - * "beans" : [ - * { - * "name":"bean-name" - * ... - * } - * ] - * } - * Each bean's attributes will be converted to a JSON object member. - * If the attribute is a boolean, a number, a string, or an array - * it will be converted to the JSON equivalent. - *

- * If the value is a {@link CompositeData} then it will be converted - * to a JSON object with the keys as the name of the JSON member and - * the value is converted following these same rules. - * If the value is a {@link TabularData} then it will be converted - * to an array of the {@link CompositeData} elements that it contains. - * All other objects will be converted to a string and output as such. - * The bean's name and modelerType will be returned for all beans. - */ -public class JMXJsonUtil { - // MBean server instance - protected static transient MBeanServer mBeanServer = - ManagementFactory.getPlatformMBeanServer(); - - private static final Logger LOG = Logger.getLogger(JMXJsonUtil.class); - - // Returns the JMX beans as a JSON string. - public static String getJMXJson() { - StringWriter writer = new StringWriter(); - try { - JsonGenerator jg = null; - try { - JsonFactory jsonFactory = new JsonFactory(); - jg = jsonFactory.createJsonGenerator(writer); - jg.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); - jg.writeStartObject(); - if (mBeanServer == null) { - jg.writeStringField("result", "ERROR"); - jg.writeStringField("message", "No MBeanServer could be found"); - jg.close(); - LOG.error("No MBeanServer could be found."); - return writer.toString(); - } - listBeans(jg); - } finally { - if (jg != null) { - jg.close(); - } - if (writer != null) { - writer.close(); - } - } - } catch (IOException e) { - LOG.error("Caught an exception while processing JMX request", e); - } - return writer.toString(); - } - - // Utility method that lists all the mbeans and write them using the supplied - // JsonGenerator. - private static void listBeans(JsonGenerator jg) throws IOException { - Set names; - names = mBeanServer.queryNames(null, null); - jg.writeArrayFieldStart("beans"); - Iterator it = names.iterator(); - while (it.hasNext()) { - ObjectName oname = it.next(); - MBeanInfo minfo; - String code = ""; - try { - minfo = mBeanServer.getMBeanInfo(oname); - code = minfo.getClassName(); - String prs = ""; - try { - if ("org.apache.commons.modeler.BaseModelMBean".equals(code)) { - prs = "modelerType"; - code = (String) mBeanServer.getAttribute(oname, prs); - } - } catch (AttributeNotFoundException e) { - // If the modelerType attribute was not found, the class name is used - // instead. - LOG.error("getting attribute " + prs + " of " + oname - + " threw an exception", e); - } catch (MBeanException e) { - // The code inside the attribute getter threw an exception so log it, - // and fall back on the class name - LOG.error("getting attribute " + prs + " of " + oname - + " threw an exception", e); - } catch (RuntimeException e) { - // For some reason even with an MBeanException available to them - // Runtime exceptionscan still find their way through, so treat them - // the same as MBeanException - LOG.error("getting attribute " + prs + " of " + oname - + " threw an exception", e); - } catch (ReflectionException e) { - // This happens when the code inside the JMX bean (setter?? from the - // java docs) threw an exception, so log it and fall back on the - // class name - LOG.error("getting attribute " + prs + " of " + oname - + " threw an exception", e); - } - } catch (InstanceNotFoundException e) { - //Ignored for some reason the bean was not found so don't output it - continue; - } catch (IntrospectionException | ReflectionException e) { - // This is an internal error, something odd happened with reflection so - // log it and don't output the bean. - LOG.error("Problem while trying to process JMX query with MBean " + oname, e); - continue; - } - jg.writeStartObject(); - jg.writeStringField("name", oname.toString()); - jg.writeStringField("modelerType", code); - MBeanAttributeInfo[] attrs = minfo.getAttributes(); - for (int i = 0; i < attrs.length; i++) { - writeAttribute(jg, oname, attrs[i]); - } - jg.writeEndObject(); - } - jg.writeEndArray(); - } - - // Utility method to write mBean attributes. - private static void writeAttribute(JsonGenerator jg, ObjectName oname, - MBeanAttributeInfo attr) throws IOException { - if (!attr.isReadable()) { - return; - } - String attName = attr.getName(); - if ("modelerType".equals(attName)) { - return; - } - if (attName.indexOf("=") >= 0 || attName.indexOf(":") >= 0 - || attName.indexOf(" ") >= 0) { - return; - } - Object value = null; - try { - value = mBeanServer.getAttribute(oname, attName); - } catch (RuntimeMBeanException e) { - // UnsupportedOperationExceptions happen in the normal course of business, - // so no need to log them as errors all the time. - if (e.getCause() instanceof UnsupportedOperationException) { - LOG.trace("getting attribute " + attName + " of " + oname + " threw an exception", e); - } else { - LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e); - } - return; - } catch (RuntimeErrorException e) { - // RuntimeErrorException happens when an unexpected failure occurs in getAttribute - // for example https://issues.apache.org/jira/browse/DAEMON-120 - LOG.debug("getting attribute " + attName + " of " + oname + " threw an exception", e); - return; - } catch (AttributeNotFoundException e) { - //Ignored the attribute was not found, which should never happen because the bean - //just told us that it has this attribute, but if this happens just don't output - //the attribute. - return; - } catch (MBeanException e) { - //The code inside the attribute getter threw an exception so log it, and - // skip outputting the attribute - LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e); - return; - } catch (RuntimeException e) { - //For some reason even with an MBeanException available to them Runtime exceptions - //can still find their way through, so treat them the same as MBeanException - LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e); - return; - } catch (ReflectionException e) { - //This happens when the code inside the JMX bean (setter?? from the java docs) - //threw an exception, so log it and skip outputting the attribute - LOG.error("getting attribute " + attName + " of " + oname + " threw an exception", e); - return; - } catch (InstanceNotFoundException e) { - //Ignored the mbean itself was not found, which should never happen because we - //just accessed it (perhaps something unregistered in-between) but if this - //happens just don't output the attribute. - return; - } - writeAttribute(jg, attName, value); - } - - private static void writeAttribute(JsonGenerator jg, String attName, Object value) - throws IOException { - jg.writeFieldName(attName); - writeObject(jg, value); - } - - private static void writeObject(JsonGenerator jg, Object value) throws IOException { - if (value == null) { - jg.writeNull(); - } else { - Class c = value.getClass(); - if (c.isArray()) { - jg.writeStartArray(); - int len = Array.getLength(value); - for (int j = 0; j < len; j++) { - Object item = Array.get(value, j); - writeObject(jg, item); - } - jg.writeEndArray(); - } else if (value instanceof Number) { - Number n = (Number) value; - jg.writeNumber(n.toString()); - } else if (value instanceof Boolean) { - Boolean b = (Boolean) value; - jg.writeBoolean(b); - } else if (value instanceof CompositeData) { - CompositeData cds = (CompositeData) value; - CompositeType comp = cds.getCompositeType(); - Set keys = comp.keySet(); - jg.writeStartObject(); - for (String key : keys) { - writeAttribute(jg, key, cds.get(key)); - } - jg.writeEndObject(); - } else if (value instanceof TabularData) { - TabularData tds = (TabularData) value; - jg.writeStartArray(); - for (Object entry : tds.values()) { - writeObject(jg, entry); - } - jg.writeEndArray(); - } else { - jg.writeString(value.toString()); - } - } - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java deleted file mode 100644 index 50820c2ecb9352..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JNINativeMethod.java +++ /dev/null @@ -1,38 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.utils; - -/** - * Native method in doris::JavaNativeMethods. - */ -public class JNINativeMethod { - /** - * Resize string column and return the new column address in off heap. - */ - public static native long resizeStringColumn(long columnAddr, int byteSize); - - /** - * Allocate memory in off heap, which will be tracked by memory tracker. - */ - public static native long memoryTrackerMalloc(long size); - - /** - * Free memory in off heap, which will be tracked by memory tracker. - */ - public static native void memoryTrackerFree(long address); -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java deleted file mode 100644 index 846c4bb172d44a..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JavaUdfDataType.java +++ /dev/null @@ -1,235 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.utils; - -import org.apache.doris.catalog.Type; -import org.apache.doris.common.exception.InternalException; -import org.apache.doris.thrift.TPrimitiveType; - -import com.google.common.collect.Sets; -import org.apache.log4j.Logger; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.util.HashSet; -import java.util.Set; - -// Data types that are supported as return or argument types in Java UDFs. -public class JavaUdfDataType { - public static final Logger LOG = Logger.getLogger(JavaUdfDataType.class); - public static final JavaUdfDataType INVALID_TYPE = new JavaUdfDataType("INVALID_TYPE", - TPrimitiveType.INVALID_TYPE, 0); - public static final JavaUdfDataType BOOLEAN = new JavaUdfDataType("BOOLEAN", TPrimitiveType.BOOLEAN, 1); - public static final JavaUdfDataType TINYINT = new JavaUdfDataType("TINYINT", TPrimitiveType.TINYINT, 1); - public static final JavaUdfDataType SMALLINT = new JavaUdfDataType("SMALLINT", TPrimitiveType.SMALLINT, 2); - public static final JavaUdfDataType INT = new JavaUdfDataType("INT", TPrimitiveType.INT, 4); - public static final JavaUdfDataType BIGINT = new JavaUdfDataType("BIGINT", TPrimitiveType.BIGINT, 8); - public static final JavaUdfDataType FLOAT = new JavaUdfDataType("FLOAT", TPrimitiveType.FLOAT, 4); - public static final JavaUdfDataType DOUBLE = new JavaUdfDataType("DOUBLE", TPrimitiveType.DOUBLE, 8); - public static final JavaUdfDataType CHAR = new JavaUdfDataType("CHAR", TPrimitiveType.CHAR, 0); - public static final JavaUdfDataType VARCHAR = new JavaUdfDataType("VARCHAR", TPrimitiveType.VARCHAR, 0); - public static final JavaUdfDataType STRING = new JavaUdfDataType("STRING", TPrimitiveType.STRING, 0); - public static final JavaUdfDataType DATE = new JavaUdfDataType("DATE", TPrimitiveType.DATE, 8); - public static final JavaUdfDataType DATETIME = new JavaUdfDataType("DATETIME", TPrimitiveType.DATETIME, 8); - public static final JavaUdfDataType LARGEINT = new JavaUdfDataType("LARGEINT", TPrimitiveType.LARGEINT, 16); - public static final JavaUdfDataType DECIMALV2 = new JavaUdfDataType("DECIMALV2", TPrimitiveType.DECIMALV2, 16); - public static final JavaUdfDataType DATEV2 = new JavaUdfDataType("DATEV2", TPrimitiveType.DATEV2, 4); - public static final JavaUdfDataType DATETIMEV2 = new JavaUdfDataType("DATETIMEV2", TPrimitiveType.DATETIMEV2, - 8); - public static final JavaUdfDataType DECIMAL32 = new JavaUdfDataType("DECIMAL32", TPrimitiveType.DECIMAL32, 4); - public static final JavaUdfDataType DECIMAL64 = new JavaUdfDataType("DECIMAL64", TPrimitiveType.DECIMAL64, 8); - public static final JavaUdfDataType DECIMAL128 = new JavaUdfDataType("DECIMAL128", TPrimitiveType.DECIMAL128I, - 16); - public static final JavaUdfDataType ARRAY_TYPE = new JavaUdfDataType("ARRAY_TYPE", TPrimitiveType.ARRAY, 0); - public static final JavaUdfDataType MAP_TYPE = new JavaUdfDataType("MAP_TYPE", TPrimitiveType.MAP, 0); - - private static Set JavaUdfDataTypeSet = new HashSet<>(); - - static { - JavaUdfDataTypeSet.add(INVALID_TYPE); - JavaUdfDataTypeSet.add(BOOLEAN); - JavaUdfDataTypeSet.add(TINYINT); - JavaUdfDataTypeSet.add(SMALLINT); - JavaUdfDataTypeSet.add(INT); - JavaUdfDataTypeSet.add(BIGINT); - JavaUdfDataTypeSet.add(FLOAT); - JavaUdfDataTypeSet.add(DOUBLE); - JavaUdfDataTypeSet.add(CHAR); - JavaUdfDataTypeSet.add(VARCHAR); - JavaUdfDataTypeSet.add(STRING); - JavaUdfDataTypeSet.add(DATE); - JavaUdfDataTypeSet.add(DATETIME); - JavaUdfDataTypeSet.add(LARGEINT); - JavaUdfDataTypeSet.add(DECIMALV2); - JavaUdfDataTypeSet.add(DATEV2); - JavaUdfDataTypeSet.add(DATETIMEV2); - JavaUdfDataTypeSet.add(DECIMAL32); - JavaUdfDataTypeSet.add(DECIMAL64); - JavaUdfDataTypeSet.add(DECIMAL128); - JavaUdfDataTypeSet.add(ARRAY_TYPE); - JavaUdfDataTypeSet.add(MAP_TYPE); - } - - private final String description; - private final TPrimitiveType thriftType; - private final int len; - private int precision; - private int scale; - private Type itemType = null; - private Type keyType; - private Type valueType; - private int keyScale; - private int valueScale; - - public JavaUdfDataType(String description, TPrimitiveType thriftType, int len) { - this.description = description; - this.thriftType = thriftType; - this.len = len; - } - - public JavaUdfDataType(JavaUdfDataType other) { - this.description = other.description; - this.thriftType = other.thriftType; - this.len = other.len; - } - - @Override - public String toString() { - return description; - } - - public TPrimitiveType getPrimitiveType() { - return thriftType; - } - - public int getLen() { - return len; - } - - public static Set getCandidateTypes(Class c) { - if (c == boolean.class || c == Boolean.class) { - return Sets.newHashSet(JavaUdfDataType.BOOLEAN); - } else if (c == byte.class || c == Byte.class) { - return Sets.newHashSet(JavaUdfDataType.TINYINT); - } else if (c == short.class || c == Short.class) { - return Sets.newHashSet(JavaUdfDataType.SMALLINT); - } else if (c == int.class || c == Integer.class) { - return Sets.newHashSet(JavaUdfDataType.INT); - } else if (c == long.class || c == Long.class) { - return Sets.newHashSet(JavaUdfDataType.BIGINT); - } else if (c == float.class || c == Float.class) { - return Sets.newHashSet(JavaUdfDataType.FLOAT); - } else if (c == double.class || c == Double.class) { - return Sets.newHashSet(JavaUdfDataType.DOUBLE); - } else if (c == char.class || c == Character.class) { - return Sets.newHashSet(JavaUdfDataType.CHAR); - } else if (c == String.class) { - return Sets.newHashSet(JavaUdfDataType.STRING); - } else if (Type.DATE_SUPPORTED_JAVA_TYPE.contains(c)) { - return Sets.newHashSet(JavaUdfDataType.DATE, JavaUdfDataType.DATEV2); - } else if (Type.DATETIME_SUPPORTED_JAVA_TYPE.contains(c)) { - return Sets.newHashSet(JavaUdfDataType.DATETIME, JavaUdfDataType.DATETIMEV2); - } else if (c == BigInteger.class) { - return Sets.newHashSet(JavaUdfDataType.LARGEINT); - } else if (c == BigDecimal.class) { - return Sets.newHashSet(JavaUdfDataType.DECIMALV2, JavaUdfDataType.DECIMAL32, JavaUdfDataType.DECIMAL64, - JavaUdfDataType.DECIMAL128); - } else if (c == java.util.ArrayList.class) { - return Sets.newHashSet(JavaUdfDataType.ARRAY_TYPE); - } else if (c == java.util.HashMap.class) { - return Sets.newHashSet(JavaUdfDataType.MAP_TYPE); - } - return Sets.newHashSet(JavaUdfDataType.INVALID_TYPE); - } - - public static boolean isSupported(Type t) { - for (JavaUdfDataType javaType : JavaUdfDataTypeSet) { - if (javaType == JavaUdfDataType.INVALID_TYPE) { - continue; - } - if (javaType.getPrimitiveType() == t.getPrimitiveType().toThrift()) { - return true; - } - } - return false; - } - - public int getPrecision() { - return precision; - } - - public void setPrecision(int precision) { - this.precision = precision; - } - - public int getScale() { - return this.thriftType == TPrimitiveType.DECIMALV2 ? 9 : scale; - } - - public void setScale(int scale) { - this.scale = scale; - } - - public Type getItemType() { - return itemType; - } - - public void setItemType(Type type) throws InternalException { - if (this.itemType == null) { - this.itemType = type; - } else { - if (!this.itemType.matchesType(type)) { - LOG.info("set error"); - throw new InternalException("udf type not matches origin type :" + this.itemType.toSql() - + " set type :" + type.toSql()); - } - } - } - - public Type getKeyType() { - return keyType; - } - - public Type getValueType() { - return valueType; - } - - public void setKeyType(Type type) { - this.keyType = type; - } - - public void setValueType(Type type) { - this.valueType = type; - } - - public void setKeyScale(int scale) { - this.keyScale = scale; - } - - public void setValueScale(int scale) { - this.valueScale = scale; - } - - public int getKeyScale() { - return keyScale; - } - - public int getValueScale() { - return valueScale; - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JniUtil.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JniUtil.java deleted file mode 100644 index 1a91df84be6904..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JniUtil.java +++ /dev/null @@ -1,272 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.utils; - -import org.apache.doris.common.exception.InternalException; -import org.apache.doris.thrift.TGetJMXJsonResponse; -import org.apache.doris.thrift.TGetJvmMemoryMetricsResponse; -import org.apache.doris.thrift.TGetJvmThreadsInfoRequest; -import org.apache.doris.thrift.TGetJvmThreadsInfoResponse; -import org.apache.doris.thrift.TJvmMemoryPool; -import org.apache.doris.thrift.TJvmThreadInfo; - -import com.google.common.base.Joiner; -import org.apache.thrift.TBase; -import org.apache.thrift.TDeserializer; -import org.apache.thrift.TException; -import org.apache.thrift.TSerializer; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.protocol.TProtocolFactory; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.io.Writer; -import java.lang.management.GarbageCollectorMXBean; -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryMXBean; -import java.lang.management.MemoryPoolMXBean; -import java.lang.management.MemoryUsage; -import java.lang.management.RuntimeMXBean; -import java.lang.management.ThreadInfo; -import java.lang.management.ThreadMXBean; -import java.util.ArrayList; -import java.util.Map; - -/** - * Utility class with methods intended for JNI clients - */ -public class JniUtil { - private static final TBinaryProtocol.Factory protocolFactory_ = new TBinaryProtocol.Factory(); - - /** - * Initializes the JvmPauseMonitor instance. - */ - public static void initPauseMonitor(long deadlockCheckIntervalS) { - JvmPauseMonitor.INSTANCE.initPauseMonitor(deadlockCheckIntervalS); - } - - /** - * Returns a formatted string containing the simple exception name and the - * exception message without the full stack trace. Includes the - * the chain of causes each in a separate line. - */ - public static String throwableToString(Throwable t) { - StringWriter output = new StringWriter(); - output.write(String.format("%s: %s", t.getClass().getSimpleName(), - t.getMessage())); - // Follow the chain of exception causes and print them as well. - Throwable cause = t; - while ((cause = cause.getCause()) != null) { - output.write(String.format("\nCAUSED BY: %s: %s", - cause.getClass().getSimpleName(), cause.getMessage())); - } - return output.toString(); - } - - /** - * Returns the stack trace of the Throwable object. - */ - public static String throwableToStackTrace(Throwable t) { - Writer output = new StringWriter(); - t.printStackTrace(new PrintWriter(output)); - return output.toString(); - } - - /** - * Serializes input into a byte[] using the default protocol factory. - */ - public static > byte[] serializeToThrift(T input) throws InternalException { - try { - TSerializer serializer = new TSerializer(protocolFactory_); - return serializer.serialize(input); - } catch (TException e) { - throw new InternalException(e.getMessage()); - } - } - - /** - * Serializes input into a byte[] using a given protocol factory. - */ - public static , F extends TProtocolFactory> byte[] serializeToThrift( - T input, F protocolFactory) throws InternalException { - try { - TSerializer serializer = new TSerializer(protocolFactory); - return serializer.serialize(input); - } catch (TException e) { - throw new InternalException(e.getMessage()); - } - } - - public static > void deserializeThrift( - T result, byte[] thriftData) throws InternalException { - deserializeThrift(protocolFactory_, result, thriftData); - } - - /** - * Deserialize a serialized form of a Thrift data structure to its object form. - */ - public static , F extends TProtocolFactory> void deserializeThrift( - F protocolFactory, T result, byte[] thriftData) throws InternalException { - // TODO: avoid creating deserializer for each query? - try { - TDeserializer deserializer = new TDeserializer(protocolFactory); - deserializer.deserialize(result, thriftData); - } catch (TException e) { - throw new InternalException(e.getMessage()); - } - } - - /** - * Collect the JVM's memory statistics into a thrift structure for translation into - * Doris metrics by the backend. A synthetic 'total' memory pool is included with - * aggregate statistics for all real pools. Metrics for the JvmPauseMonitor - * and Garbage Collection are also included. - */ - public static byte[] getJvmMemoryMetrics() throws InternalException { - TGetJvmMemoryMetricsResponse jvmMetrics = new TGetJvmMemoryMetricsResponse(); - jvmMetrics.setMemoryPools(new ArrayList()); - TJvmMemoryPool totalUsage = new TJvmMemoryPool(); - - totalUsage.setName("total"); - jvmMetrics.getMemoryPools().add(totalUsage); - - for (MemoryPoolMXBean memBean : ManagementFactory.getMemoryPoolMXBeans()) { - TJvmMemoryPool usage = new TJvmMemoryPool(); - MemoryUsage beanUsage = memBean.getUsage(); - usage.setCommitted(beanUsage.getCommitted()); - usage.setInit(beanUsage.getInit()); - usage.setMax(beanUsage.getMax()); - usage.setUsed(beanUsage.getUsed()); - usage.setName(memBean.getName()); - - totalUsage.committed += beanUsage.getCommitted(); - totalUsage.init += beanUsage.getInit(); - totalUsage.max += beanUsage.getMax(); - totalUsage.used += beanUsage.getUsed(); - - MemoryUsage peakUsage = memBean.getPeakUsage(); - usage.setPeakCommitted(peakUsage.getCommitted()); - usage.setPeakInit(peakUsage.getInit()); - usage.setPeakMax(peakUsage.getMax()); - usage.setPeakUsed(peakUsage.getUsed()); - - totalUsage.peak_committed += peakUsage.getCommitted(); - totalUsage.peak_init += peakUsage.getInit(); - totalUsage.peak_max += peakUsage.getMax(); - totalUsage.peak_used += peakUsage.getUsed(); - - jvmMetrics.getMemoryPools().add(usage); - } - - // Populate heap usage - MemoryMXBean mBean = ManagementFactory.getMemoryMXBean(); - TJvmMemoryPool heap = new TJvmMemoryPool(); - MemoryUsage heapUsage = mBean.getHeapMemoryUsage(); - heap.setCommitted(heapUsage.getCommitted()); - heap.setInit(heapUsage.getInit()); - heap.setMax(heapUsage.getMax()); - heap.setUsed(heapUsage.getUsed()); - heap.setName("heap"); - heap.setPeakCommitted(0); - heap.setPeakInit(0); - heap.setPeakMax(0); - heap.setPeakUsed(0); - jvmMetrics.getMemoryPools().add(heap); - - // Populate non-heap usage - TJvmMemoryPool nonHeap = new TJvmMemoryPool(); - MemoryUsage nonHeapUsage = mBean.getNonHeapMemoryUsage(); - nonHeap.setCommitted(nonHeapUsage.getCommitted()); - nonHeap.setInit(nonHeapUsage.getInit()); - nonHeap.setMax(nonHeapUsage.getMax()); - nonHeap.setUsed(nonHeapUsage.getUsed()); - nonHeap.setName("non-heap"); - nonHeap.setPeakCommitted(0); - nonHeap.setPeakInit(0); - nonHeap.setPeakMax(0); - nonHeap.setPeakUsed(0); - jvmMetrics.getMemoryPools().add(nonHeap); - - // Populate JvmPauseMonitor metrics - jvmMetrics.setGcNumWarnThresholdExceeded( - JvmPauseMonitor.INSTANCE.getNumGcWarnThresholdExceeded()); - jvmMetrics.setGcNumInfoThresholdExceeded( - JvmPauseMonitor.INSTANCE.getNumGcInfoThresholdExceeded()); - jvmMetrics.setGcTotalExtraSleepTimeMillis( - JvmPauseMonitor.INSTANCE.getTotalGcExtraSleepTime()); - - // And Garbage Collector metrics - long gcCount = 0; - long gcTimeMillis = 0; - for (GarbageCollectorMXBean bean : ManagementFactory.getGarbageCollectorMXBeans()) { - gcCount += bean.getCollectionCount(); - gcTimeMillis += bean.getCollectionTime(); - } - jvmMetrics.setGcCount(gcCount); - jvmMetrics.setGcTimeMillis(gcTimeMillis); - - return serializeToThrift(jvmMetrics, protocolFactory_); - } - - /** - * Get information about the live JVM threads. - */ - public static byte[] getJvmThreadsInfo(byte[] argument) throws InternalException { - TGetJvmThreadsInfoRequest request = new TGetJvmThreadsInfoRequest(); - JniUtil.deserializeThrift(protocolFactory_, request, argument); - TGetJvmThreadsInfoResponse response = new TGetJvmThreadsInfoResponse(); - ThreadMXBean threadBean = ManagementFactory.getThreadMXBean(); - response.setTotalThreadCount(threadBean.getThreadCount()); - response.setDaemonThreadCount(threadBean.getDaemonThreadCount()); - response.setPeakThreadCount(threadBean.getPeakThreadCount()); - if (request.get_complete_info) { - for (ThreadInfo threadInfo : threadBean.dumpAllThreads(true, true)) { - TJvmThreadInfo tThreadInfo = new TJvmThreadInfo(); - long id = threadInfo.getThreadId(); - tThreadInfo.setSummary(threadInfo.toString()); - tThreadInfo.setCpuTimeInNs(threadBean.getThreadCpuTime(id)); - tThreadInfo.setUserTimeInNs(threadBean.getThreadUserTime(id)); - tThreadInfo.setBlockedCount(threadInfo.getBlockedCount()); - tThreadInfo.setBlockedTimeInMs(threadInfo.getBlockedTime()); - tThreadInfo.setIsInNative(threadInfo.isInNative()); - response.addToThreads(tThreadInfo); - } - } - return serializeToThrift(response, protocolFactory_); - } - - public static byte[] getJMXJson() throws InternalException { - TGetJMXJsonResponse response = new TGetJMXJsonResponse(JMXJsonUtil.getJMXJson()); - return serializeToThrift(response, protocolFactory_); - } - - /** - * Get Java version, input arguments and system properties. - */ - public static String getJavaVersion() { - RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean(); - StringBuilder sb = new StringBuilder(); - sb.append("Java Input arguments:\n"); - sb.append(Joiner.on(" ").join(runtime.getInputArguments())); - sb.append("\nJava System properties:\n"); - for (Map.Entry entry : runtime.getSystemProperties().entrySet()) { - sb.append(entry.getKey() + ":" + entry.getValue() + "\n"); - } - return sb.toString(); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JvmPauseMonitor.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JvmPauseMonitor.java deleted file mode 100644 index 85af333156a3d3..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/JvmPauseMonitor.java +++ /dev/null @@ -1,321 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-4.0.0/fe/src/main/java/org/apache/impala/util/JvmPauseMonitor.java -// and modified by Doris - -package org.apache.doris.common.jni.utils; - -import com.google.common.base.Joiner; -import com.google.common.base.Stopwatch; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import org.apache.log4j.Logger; - -import java.lang.management.GarbageCollectorMXBean; -import java.lang.management.ManagementFactory; -import java.lang.management.ThreadInfo; -import java.lang.management.ThreadMXBean; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; - -/** - * Class which sets up a simple thread which runs in a loop sleeping - * for a short interval of time. If the sleep takes significantly longer - * than its target time, it implies that the JVM or host machine has - * paused processing, which may cause other problems. If such a pause is - * detected, the thread logs a message. - */ -public class JvmPauseMonitor { - private static final Logger LOG = Logger.getLogger(JvmPauseMonitor.class); - - // The target sleep time. - private static final long SLEEP_INTERVAL_MS = 500; - - // Check for Java deadlocks at this interval. Set by init(). 0 or negative means that - // the deadlock checks are disabled. - private long deadlockCheckIntervalS = 0; - - // log WARN if we detect a pause longer than this threshold. - private long warnThresholdMs; - private static final long WARN_THRESHOLD_MS = 10000; - - // log INFO if we detect a pause longer than this threshold. - private long infoThresholdMs; - private static final long INFO_THRESHOLD_MS = 1000; - - // Overall metrics - // Volatile to allow populating metrics concurrently with the values - // being updated without staleness (but with no other synchronization - // guarantees). - private volatile long numGcWarnThresholdExceeded = 0; - private volatile long numGcInfoThresholdExceeded = 0; - private volatile long totalGcExtraSleepTime = 0; - - // Daemon thread running the pause monitor loop. - private Thread monitorThread; - private volatile boolean shouldRun = true; - - // Singleton instance of this pause monitor. - public static JvmPauseMonitor INSTANCE = new JvmPauseMonitor(); - - // Initializes the pause monitor. No-op if called multiple times. - public static void initPauseMonitor(long deadlockCheckIntervalS) { - if (INSTANCE.isStarted()) { - return; - } - INSTANCE.init(deadlockCheckIntervalS); - } - - private JvmPauseMonitor() { - this(INFO_THRESHOLD_MS, WARN_THRESHOLD_MS); - } - - private JvmPauseMonitor(long infoThresholdMs, long warnThresholdMs) { - this.infoThresholdMs = infoThresholdMs; - this.warnThresholdMs = warnThresholdMs; - } - - protected void init(long deadlockCheckIntervalS) { - this.deadlockCheckIntervalS = deadlockCheckIntervalS; - monitorThread = new Thread(new Monitor(), "JVM pause monitor"); - monitorThread.setDaemon(true); - monitorThread.start(); - } - - public boolean isStarted() { - return monitorThread != null; - } - - public long getNumGcWarnThresholdExceeded() { - return numGcWarnThresholdExceeded; - } - - public long getNumGcInfoThresholdExceeded() { - return numGcInfoThresholdExceeded; - } - - public long getTotalGcExtraSleepTime() { - return totalGcExtraSleepTime; - } - - /** - * Helper method that formats the message to be logged, along with - * the GC metrics. - */ - private String formatMessage(long extraSleepTime, - Map gcTimesAfterSleep, - Map gcTimesBeforeSleep) { - - Set gcBeanNames = Sets.intersection( - gcTimesAfterSleep.keySet(), - gcTimesBeforeSleep.keySet()); - List gcDiffs = Lists.newArrayList(); - for (String name : gcBeanNames) { - GcTimes diff = gcTimesAfterSleep.get(name).subtract( - gcTimesBeforeSleep.get(name)); - if (diff.gcCount != 0) { - gcDiffs.add("GC pool '" + name + "' had collection(s): " + diff); - } - } - - String ret = "Detected pause in JVM or host machine (eg GC): " - + "pause of approximately " + extraSleepTime + "ms\n"; - if (gcDiffs.isEmpty()) { - ret += "No GCs detected"; - } else { - ret += Joiner.on("\n").join(gcDiffs); - } - return ret; - } - - private Map getGcTimes() { - Map map = Maps.newHashMap(); - List gcBeans = - ManagementFactory.getGarbageCollectorMXBeans(); - for (GarbageCollectorMXBean gcBean : gcBeans) { - map.put(gcBean.getName(), new GcTimes(gcBean)); - } - return map; - } - - private static class GcTimes { - private GcTimes(GarbageCollectorMXBean gcBean) { - gcCount = gcBean.getCollectionCount(); - gcTimeMillis = gcBean.getCollectionTime(); - } - - private GcTimes(long count, long time) { - this.gcCount = count; - this.gcTimeMillis = time; - } - - private GcTimes subtract(GcTimes other) { - return new GcTimes(this.gcCount - other.gcCount, - this.gcTimeMillis - other.gcTimeMillis); - } - - @Override - public String toString() { - return "count=" + gcCount + " time=" + gcTimeMillis + "ms"; - } - - private long gcCount; - private long gcTimeMillis; - } - - /** - * Runnable instance of the pause monitor loop. Launched from serviceStart(). - */ - private class Monitor implements Runnable { - @Override - public void run() { - Stopwatch sw = Stopwatch.createUnstarted(); - Stopwatch timeSinceDeadlockCheck = Stopwatch.createStarted(); - Map gcTimesBeforeSleep = getGcTimes(); - LOG.info("Starting JVM pause monitor"); - while (shouldRun) { - sw.reset().start(); - try { - Thread.sleep(SLEEP_INTERVAL_MS); - } catch (InterruptedException ie) { - LOG.error("JVM pause monitor interrupted", ie); - return; - } - sw.stop(); - long extraSleepTime = sw.elapsed(TimeUnit.MILLISECONDS) - SLEEP_INTERVAL_MS; - Map gcTimesAfterSleep = getGcTimes(); - - if (extraSleepTime > warnThresholdMs) { - ++numGcWarnThresholdExceeded; - LOG.warn(formatMessage( - extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep)); - } else if (extraSleepTime > infoThresholdMs) { - ++numGcInfoThresholdExceeded; - LOG.info(formatMessage( - extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep)); - } - totalGcExtraSleepTime += extraSleepTime; - gcTimesBeforeSleep = gcTimesAfterSleep; - - if (deadlockCheckIntervalS > 0 - && timeSinceDeadlockCheck.elapsed(TimeUnit.SECONDS) >= deadlockCheckIntervalS) { - checkForDeadlocks(); - timeSinceDeadlockCheck.reset().start(); - } - } - } - - /** - * Check for deadlocks between Java threads using the JVM's deadlock detector. - * If a deadlock is found, log info about the deadlocked threads and exit the - * process. - *

- * We choose to exit the process this situation because the deadlock will likely - * cause hangs and other forms of service unavailability and there is no way to - * recover from the deadlock except by restarting the process. - */ - private void checkForDeadlocks() { - ThreadMXBean threadMx = ManagementFactory.getThreadMXBean(); - long[] deadlockedTids = threadMx.findDeadlockedThreads(); - if (deadlockedTids != null) { - ThreadInfo[] deadlockedThreads = - threadMx.getThreadInfo(deadlockedTids, true, true); - // Log diagnostics with error before aborting the process with a FATAL log. - LOG.error("Found " + deadlockedThreads.length + " threads in deadlock: "); - for (ThreadInfo thread : deadlockedThreads) { - // Defensively check for null in case the thread somehow disappeared between - // findDeadlockedThreads() and getThreadInfo(). - if (thread != null) { - LOG.error(thread.toString()); - } - } - LOG.warn("All threads:"); - for (ThreadInfo thread : threadMx.dumpAllThreads(true, true)) { - LOG.error(thread.toString()); - } - // In the context of an Doris service, LOG.fatal calls glog's fatal, which - // aborts the process, which will produce a coredump if coredumps are enabled. - LOG.fatal("Aborting because of deadlocked threads in JVM."); - System.exit(1); - } - } - } - - /** - * Helper for manual testing that causes a deadlock between java threads. - */ - private static void causeDeadlock() { - final Object obj1 = new Object(); - final Object obj2 = new Object(); - - new Thread(new Runnable() { - - @Override - public void run() { - while (true) { - synchronized (obj2) { - synchronized (obj1) { - System.err.println("Thread 1 got locks"); - } - } - } - } - }).start(); - - while (true) { - synchronized (obj1) { - synchronized (obj2) { - System.err.println("Thread 2 got locks"); - } - } - } - } - - /** - * This function just leaks memory into a list. Running this function - * with a 1GB heap will very quickly go into "GC hell" and result in - * log messages about the GC pauses. - */ - private static void allocateMemory() { - List list = Lists.newArrayList(); - int i = 0; - while (true) { - list.add(String.valueOf(i++)); - } - } - - /** - * Simple 'main' to facilitate manual testing of the pause monitor. - */ - @SuppressWarnings("resource") - public static void main(String[] args) throws Exception { - JvmPauseMonitor monitor = new JvmPauseMonitor(); - monitor.init(60); - if (args[0].equals("gc")) { - allocateMemory(); - } else if (args[0].equals("deadlock")) { - causeDeadlock(); - } else { - System.err.println("Unknown mode"); - } - } - -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java deleted file mode 100644 index 268f5f6787666b..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/OffHeap.java +++ /dev/null @@ -1,183 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.utils; - - -import sun.misc.Unsafe; - -import java.lang.reflect.Field; -import java.security.AccessController; -import java.security.PrivilegedAction; - -/** - * Reference to Apache Spark with some customization. - * Default call native method to allocate and release memory, which will be tracked by memory tracker in BE. - * Call {@link OffHeap#setTesting()} in test scenario. - */ -public class OffHeap { - private static final long UNSAFE_COPY_THRESHOLD = 1024L * 1024L; - - private static boolean IS_TESTING = false; - - private static final Unsafe UNSAFE; - - public static final int BOOLEAN_ARRAY_OFFSET; - - public static final int BYTE_ARRAY_OFFSET; - - public static final int SHORT_ARRAY_OFFSET; - - public static final int INT_ARRAY_OFFSET; - - public static final int LONG_ARRAY_OFFSET; - - public static final int FLOAT_ARRAY_OFFSET; - - public static final int DOUBLE_ARRAY_OFFSET; - - static { - UNSAFE = (Unsafe) AccessController.doPrivileged( - (PrivilegedAction) () -> { - try { - Field f = Unsafe.class.getDeclaredField("theUnsafe"); - f.setAccessible(true); - return f.get(null); - } catch (NoSuchFieldException | IllegalAccessException e) { - throw new Error(); - } - }); - BOOLEAN_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(boolean[].class); - BYTE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); - SHORT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(short[].class); - INT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(int[].class); - LONG_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(long[].class); - FLOAT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(float[].class); - DOUBLE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(double[].class); - } - - public static void setTesting() { - IS_TESTING = true; - } - - public static int getInt(Object object, long offset) { - return UNSAFE.getInt(object, offset); - } - - public static void putInt(Object object, long offset, int value) { - UNSAFE.putInt(object, offset, value); - } - - public static boolean getBoolean(Object object, long offset) { - return UNSAFE.getBoolean(object, offset); - } - - public static void putBoolean(Object object, long offset, boolean value) { - UNSAFE.putBoolean(object, offset, value); - } - - public static byte getByte(Object object, long offset) { - return UNSAFE.getByte(object, offset); - } - - public static void putByte(Object object, long offset, byte value) { - UNSAFE.putByte(object, offset, value); - } - - public static short getShort(Object object, long offset) { - return UNSAFE.getShort(object, offset); - } - - public static void putShort(Object object, long offset, short value) { - UNSAFE.putShort(object, offset, value); - } - - public static long getLong(Object object, long offset) { - return UNSAFE.getLong(object, offset); - } - - public static void putLong(Object object, long offset, long value) { - UNSAFE.putLong(object, offset, value); - } - - public static float getFloat(Object object, long offset) { - return UNSAFE.getFloat(object, offset); - } - - public static void putFloat(Object object, long offset, float value) { - UNSAFE.putFloat(object, offset, value); - } - - public static double getDouble(Object object, long offset) { - return UNSAFE.getDouble(object, offset); - } - - public static void putDouble(Object object, long offset, double value) { - UNSAFE.putDouble(object, offset, value); - } - - public static void setMemory(long address, byte value, long size) { - UNSAFE.setMemory(address, size, value); - } - - public static long allocateMemory(long size) { - if (IS_TESTING) { - return UNSAFE.allocateMemory(size); - } else { - return JNINativeMethod.memoryTrackerMalloc(size); - } - } - - public static void freeMemory(long address) { - if (IS_TESTING) { - UNSAFE.freeMemory(address); - } else { - JNINativeMethod.memoryTrackerFree(address); - } - } - - public static long reallocateMemory(long address, long oldSize, long newSize) { - long newMemory = allocateMemory(newSize); - copyMemory(null, address, null, newMemory, oldSize); - freeMemory(address); - return newMemory; - } - - public static void copyMemory(Object src, long srcOffset, Object dst, long dstOffset, long length) { - // Check if dstOffset is before or after srcOffset to determine if we should copy - // forward or backwards. This is necessary in case src and dst overlap. - if (dstOffset < srcOffset) { - while (length > 0) { - long size = Math.min(length, UNSAFE_COPY_THRESHOLD); - UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size); - length -= size; - srcOffset += size; - dstOffset += size; - } - } else { - srcOffset += length; - dstOffset += length; - while (length > 0) { - long size = Math.min(length, UNSAFE_COPY_THRESHOLD); - srcOffset -= size; - dstOffset -= size; - UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size); - length -= size; - } - } - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/TypeNativeBytes.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/TypeNativeBytes.java deleted file mode 100644 index bd2eb79cd295e0..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/TypeNativeBytes.java +++ /dev/null @@ -1,132 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.utils; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.math.RoundingMode; -import java.time.DateTimeException; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.Arrays; - -public class TypeNativeBytes { - /** - * Change the order of the bytes, Because JVM is Big-Endian , x86 is Little-Endian. - */ - public static byte[] convertByteOrder(byte[] bytes) { - int length = bytes.length; - for (int i = 0; i < length / 2; ++i) { - byte temp = bytes[i]; - bytes[i] = bytes[length - 1 - i]; - bytes[length - 1 - i] = temp; - } - return bytes; - } - - public static byte[] getBigIntegerBytes(BigInteger v) { - byte[] bytes = v.toByteArray(); - // If the BigInteger is not negative and the first byte is 0, remove the first byte - if (v.signum() >= 0 && bytes[0] == 0) { - bytes = Arrays.copyOfRange(bytes, 1, bytes.length); - } - // Convert the byte order if necessary - return convertByteOrder(bytes); - } - - public static BigInteger getBigInteger(byte[] bytes) { - // Convert the byte order back if necessary - byte[] originalBytes = convertByteOrder(bytes); - // If the first byte has the sign bit set, add a 0 byte at the start - if ((originalBytes[0] & 0x80) != 0) { - byte[] extendedBytes = new byte[originalBytes.length + 1]; - extendedBytes[0] = 0; - System.arraycopy(originalBytes, 0, extendedBytes, 1, originalBytes.length); - originalBytes = extendedBytes; - } - return new BigInteger(originalBytes); - } - - public static byte[] getDecimalBytes(BigDecimal v, int scale, int size) { - BigDecimal retValue = v.setScale(scale, RoundingMode.HALF_EVEN); - BigInteger data = retValue.unscaledValue(); - byte[] bytes = convertByteOrder(data.toByteArray()); - byte[] value = new byte[size]; - if (data.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - return value; - } - - public static BigDecimal getDecimal(byte[] bytes, int scale) { - BigInteger value = new BigInteger(convertByteOrder(bytes)); - return new BigDecimal(value, scale); - } - - public static int convertToDateV2(int year, int month, int day) { - return (int) (day | (long) month << 5 | (long) year << 9); - } - - public static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second) { - return (long) second << 20 | (long) minute << 26 | (long) hour << 32 - | (long) day << 37 | (long) month << 42 | (long) year << 46; - } - - public static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second, - int microsecond) { - return (long) microsecond | (long) second << 20 | (long) minute << 26 | (long) hour << 32 - | (long) day << 37 | (long) month << 42 | (long) year << 46; - } - - public static LocalDate convertToJavaDate(int date) { - int year = date >> 9; - int month = (date >> 5) & 0XF; - int day = date & 0X1F; - LocalDate value; - try { - value = LocalDate.of(year, month, day); - } catch (DateTimeException e) { - value = LocalDate.MAX; - } - return value; - } - - public static LocalDateTime convertToJavaDateTime(long time) { - int year = (int) (time >> 46); - int yearMonth = (int) (time >> 42); - int yearMonthDay = (int) (time >> 37); - - int month = (yearMonth & 0XF); - int day = (yearMonthDay & 0X1F); - - int hour = (int) ((time >> 32) & 0X1F); - int minute = (int) ((time >> 26) & 0X3F); - int second = (int) ((time >> 20) & 0X3F); - int microsecond = (int) (time & 0XFFFFF); - - LocalDateTime value; - try { - value = LocalDateTime.of(year, month, day, hour, minute, second, microsecond * 1000); - } catch (DateTimeException e) { - value = LocalDateTime.MAX; - } - return value; - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/UdfUtils.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/UdfUtils.java deleted file mode 100644 index 039a5ea25f6aef..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/utils/UdfUtils.java +++ /dev/null @@ -1,509 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.utils; - -import org.apache.doris.catalog.ArrayType; -import org.apache.doris.catalog.MapType; -import org.apache.doris.catalog.ScalarType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.Pair; -import org.apache.doris.common.exception.InternalException; - -import com.vesoft.nebula.client.graph.data.DateTimeWrapper; -import com.vesoft.nebula.client.graph.data.DateWrapper; -import com.vesoft.nebula.client.graph.data.ValueWrapper; -import org.apache.log4j.Logger; -import sun.misc.Unsafe; - -import java.io.File; -import java.io.FileNotFoundException; -import java.lang.reflect.Field; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLClassLoader; -import java.security.AccessController; -import java.security.PrivilegedAction; -import java.time.DateTimeException; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.Set; - -public class UdfUtils { - public static final Logger LOG = Logger.getLogger(UdfUtils.class); - public static final Unsafe UNSAFE; - private static final long UNSAFE_COPY_THRESHOLD = 1024L * 1024L; - public static final long BYTE_ARRAY_OFFSET; - public static final long INT_ARRAY_OFFSET; - - static { - UNSAFE = (Unsafe) AccessController.doPrivileged( - (PrivilegedAction) () -> { - try { - Field f = Unsafe.class.getDeclaredField("theUnsafe"); - f.setAccessible(true); - return f.get(null); - } catch (NoSuchFieldException | IllegalAccessException e) { - throw new Error(); - } - }); - BYTE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); - INT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(int[].class); - } - - public static void copyMemory( - Object src, long srcOffset, Object dst, long dstOffset, long length) { - // Check if dstOffset is before or after srcOffset to determine if we should copy - // forward or backwards. This is necessary in case src and dst overlap. - if (dstOffset < srcOffset) { - while (length > 0) { - long size = Math.min(length, UNSAFE_COPY_THRESHOLD); - UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size); - length -= size; - srcOffset += size; - dstOffset += size; - } - } else { - srcOffset += length; - dstOffset += length; - while (length > 0) { - long size = Math.min(length, UNSAFE_COPY_THRESHOLD); - srcOffset -= size; - dstOffset -= size; - UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size); - length -= size; - } - - } - } - - public static URLClassLoader getClassLoader(String jarPath, ClassLoader parent) - throws MalformedURLException, FileNotFoundException { - File file = new File(jarPath); - if (!file.exists()) { - throw new FileNotFoundException("Can not find local file: " + jarPath); - } - URL url = file.toURI().toURL(); - return URLClassLoader.newInstance(new URL[] {url}, parent); - } - - /** - * Sets the return type of a Java UDF. Returns true if the return type is compatible - * with the return type from the function definition. Throws an UdfRuntimeException - * if the return type is not supported. - */ - public static Pair setReturnType(Type retType, Class udfReturnType) - throws InternalException { - if (!JavaUdfDataType.isSupported(retType)) { - throw new InternalException("Unsupported return type: " + retType.toSql()); - } - Set javaTypes = JavaUdfDataType.getCandidateTypes(udfReturnType); - // Check if the evaluate method return type is compatible with the return type from - // the function definition. This happens when both of them map to the same primitive - // type. - Object[] res = javaTypes.stream().filter( - t -> t.getPrimitiveType() == retType.getPrimitiveType().toThrift()).toArray(); - - JavaUdfDataType result = new JavaUdfDataType( - res.length == 0 ? javaTypes.iterator().next() : (JavaUdfDataType) res[0]); - if (retType.isDecimalV3() || retType.isDatetimeV2()) { - result.setPrecision(retType.getPrecision()); - result.setScale(((ScalarType) retType).getScalarScale()); - } else if (retType.isArrayType()) { - ArrayType arrType = (ArrayType) retType; - result.setItemType(arrType.getItemType()); - if (arrType.getItemType().isDatetimeV2() || arrType.getItemType().isDecimalV3()) { - result.setPrecision(arrType.getItemType().getPrecision()); - result.setScale(((ScalarType) arrType.getItemType()).getScalarScale()); - } - } else if (retType.isMapType()) { - MapType mapType = (MapType) retType; - result.setKeyType(mapType.getKeyType()); - result.setValueType(mapType.getValueType()); - Type keyType = mapType.getKeyType(); - Type valuType = mapType.getValueType(); - if (keyType.isDatetimeV2() || keyType.isDecimalV3()) { - result.setKeyScale(((ScalarType) keyType).getScalarScale()); - } - if (valuType.isDatetimeV2() || valuType.isDecimalV3()) { - result.setValueScale(((ScalarType) valuType).getScalarScale()); - } - } - return Pair.of(res.length != 0, result); - } - - /** - * Sets the argument types of a Java UDF or UDAF. Returns true if the argument types specified - * in the UDF are compatible with the argument types of the evaluate() function loaded - * from the associated JAR file. - * @throws InternalException - */ - public static Pair setArgTypes(Type[] parameterTypes, Class[] udfArgTypes, - boolean isUdaf) throws InternalException { - JavaUdfDataType[] inputArgTypes = new JavaUdfDataType[parameterTypes.length]; - int firstPos = isUdaf ? 1 : 0; - for (int i = 0; i < parameterTypes.length; ++i) { - Set javaTypes = JavaUdfDataType.getCandidateTypes(udfArgTypes[i + firstPos]); - int finalI = i; - Object[] res = javaTypes.stream().filter( - t -> t.getPrimitiveType() == parameterTypes[finalI].getPrimitiveType().toThrift()).toArray(); - inputArgTypes[i] = new JavaUdfDataType( - res.length == 0 ? javaTypes.iterator().next() : (JavaUdfDataType) res[0]); - if (parameterTypes[finalI].isDecimalV3() || parameterTypes[finalI].isDatetimeV2()) { - inputArgTypes[i].setPrecision(parameterTypes[finalI].getPrecision()); - inputArgTypes[i].setScale(((ScalarType) parameterTypes[finalI]).getScalarScale()); - } else if (parameterTypes[finalI].isArrayType()) { - ArrayType arrType = (ArrayType) parameterTypes[finalI]; - inputArgTypes[i].setItemType(arrType.getItemType()); - if (arrType.getItemType().isDatetimeV2() || arrType.getItemType().isDecimalV3()) { - inputArgTypes[i].setPrecision(arrType.getItemType().getPrecision()); - inputArgTypes[i].setScale(((ScalarType) arrType.getItemType()).getScalarScale()); - } - } else if (parameterTypes[finalI].isMapType()) { - MapType mapType = (MapType) parameterTypes[finalI]; - Type keyType = mapType.getKeyType(); - Type valuType = mapType.getValueType(); - inputArgTypes[i].setKeyType(mapType.getKeyType()); - inputArgTypes[i].setValueType(mapType.getValueType()); - if (keyType.isDatetimeV2() || keyType.isDecimalV3()) { - inputArgTypes[i].setKeyScale(((ScalarType) keyType).getScalarScale()); - } - if (valuType.isDatetimeV2() || valuType.isDecimalV3()) { - inputArgTypes[i].setValueScale(((ScalarType) valuType).getScalarScale()); - } - } - if (res.length == 0) { - return Pair.of(false, inputArgTypes); - } - } - return Pair.of(true, inputArgTypes); - } - - public static Object convertDateTimeV2ToJavaDateTime(long date, Class clz) { - int year = (int) (date >> 46); - int yearMonth = (int) (date >> 42); - int yearMonthDay = (int) (date >> 37); - - int month = (yearMonth & 0XF); - int day = (yearMonthDay & 0X1F); - - int hour = (int) ((date >> 32) & 0X1F); - int minute = (int) ((date >> 26) & 0X3F); - int second = (int) ((date >> 20) & 0X3F); - //here don't need those bits are type = ((minus_type_neg >> 1) & 0x7); - - if (LocalDateTime.class.equals(clz)) { - return convertToLocalDateTime(year, month, day, hour, minute, second); - } else if (org.joda.time.DateTime.class.equals(clz)) { - return convertToJodaDateTime(year, month, day, hour, minute, second); - } else if (org.joda.time.LocalDateTime.class.equals(clz)) { - return convertToJodaLocalDateTime(year, month, day, hour, minute, second); - } else { - return null; - } - } - - /** - * input is a 64bit num from backend, and then get year, month, day, hour, minus, second by the order of bits. - */ - public static Object convertDateTimeToJavaDateTime(long date, Class clz) { - int year = (int) (date >> 48); - int yearMonth = (int) (date >> 40); - int yearMonthDay = (int) (date >> 32); - - int month = (yearMonth & 0XFF); - int day = (yearMonthDay & 0XFF); - - int hourMinuteSecond = (int) (date % (1 << 31)); - int minuteTypeNeg = (hourMinuteSecond % (1 << 16)); - - int hour = (hourMinuteSecond >> 24); - int minute = ((hourMinuteSecond >> 16) & 0XFF); - int second = (minuteTypeNeg >> 4); - //here don't need those bits are type = ((minus_type_neg >> 1) & 0x7); - - if (LocalDateTime.class.equals(clz)) { - return convertToLocalDateTime(year, month, day, hour, minute, second); - } else if (org.joda.time.DateTime.class.equals(clz)) { - return convertToJodaDateTime(year, month, day, hour, minute, second); - } else if (org.joda.time.LocalDateTime.class.equals(clz)) { - return convertToJodaLocalDateTime(year, month, day, hour, minute, second); - } else { - return null; - } - } - - public static Object convertDateV2ToJavaDate(int date, Class clz) { - int year = date >> 9; - int month = (date >> 5) & 0XF; - int day = date & 0X1F; - if (LocalDate.class.equals(clz)) { - return convertToLocalDate(year, month, day); - } else if (java.util.Date.class.equals(clz)) { - return convertToJavaDate(year, month, day); - } else if (org.joda.time.LocalDate.class.equals(clz)) { - return convertToJodaDate(year, month, day); - } else { - return null; - } - } - - public static LocalDateTime convertToLocalDateTime(int year, int month, int day, - int hour, int minute, int second) { - LocalDateTime value = null; - try { - value = LocalDateTime.of(year, month, day, hour, minute, second); - } catch (DateTimeException e) { - LOG.warn("Error occurs when parsing date time value: {}", e); - } - return value; - } - - public static org.joda.time.DateTime convertToJodaDateTime(int year, int month, int day, - int hour, int minute, int second) { - try { - return new org.joda.time.DateTime(year, month, day, hour, minute, second); - } catch (Exception e) { - return null; - } - } - - public static org.joda.time.LocalDateTime convertToJodaLocalDateTime(int year, int month, int day, - int hour, int minute, int second) { - try { - return new org.joda.time.LocalDateTime(year, month, day, hour, minute, second); - } catch (Exception e) { - return null; - } - } - - public static Object convertDateToJavaDate(long date, Class clz) { - int year = (int) (date >> 48); - int yearMonth = (int) (date >> 40); - int yearMonthDay = (int) (date >> 32); - - int month = (yearMonth & 0XFF); - int day = (yearMonthDay & 0XFF); - if (LocalDate.class.equals(clz)) { - return convertToLocalDate(year, month, day); - } else if (java.util.Date.class.equals(clz)) { - return convertToJavaDate(year, month, day); - } else if (org.joda.time.LocalDate.class.equals(clz)) { - return convertToJodaDate(year, month, day); - } else { - return null; - } - } - - /** - * a 64bit num convertToDate. - */ - public static LocalDate convertToLocalDate(int year, int month, int day) { - LocalDate value = null; - try { - value = LocalDate.of(year, month, day); - } catch (DateTimeException e) { - LOG.warn("Error occurs when parsing date value: {}", e); - } - return value; - } - - public static org.joda.time.LocalDate convertToJodaDate(int year, int month, int day) { - try { - return new org.joda.time.LocalDate(year, month, day); - } catch (Exception e) { - return null; - } - } - - public static java.util.Date convertToJavaDate(int year, int month, int day) { - try { - return new java.util.Date(year - 1900, month - 1, day); - } catch (Exception e) { - return null; - } - } - - /** - * input is the second, minute, hours, day , month and year respectively. - * and then combining all num to a 64bit value return to backend; - */ - public static long convertToDateTime(Object obj, Class clz) { - if (LocalDateTime.class.equals(clz)) { - LocalDateTime date = (LocalDateTime) obj; - return convertToDateTime(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), date.getHour(), - date.getMinute(), date.getSecond(), false); - } else if (org.joda.time.DateTime.class.equals(clz)) { - org.joda.time.DateTime date = (org.joda.time.DateTime) obj; - return convertToDateTime(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(), - date.getMinuteOfHour(), date.getSecondOfMinute(), false); - } else if (org.joda.time.LocalDateTime.class.equals(clz)) { - org.joda.time.LocalDateTime date = (org.joda.time.LocalDateTime) obj; - return convertToDateTime(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(), - date.getMinuteOfHour(), date.getSecondOfMinute(), false); - } else { - return 0; - } - } - - public static long convertToDate(Object obj, Class clz) { - if (LocalDate.class.equals(clz)) { - LocalDate date = (LocalDate) obj; - return convertToDateTime(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), 0, - 0, 0, true); - } else if (java.util.Date.class.equals(clz)) { - java.util.Date date = (java.util.Date) obj; - return convertToDateTime(date.getYear() + 1900, date.getMonth(), date.getDay(), 0, - 0, 0, true); - } else if (org.joda.time.LocalDate.class.equals(clz)) { - org.joda.time.LocalDate date = (org.joda.time.LocalDate) obj; - return convertToDateTime(date.getYear(), date.getDayOfMonth(), date.getDayOfMonth(), 0, - 0, 0, true); - } else { - return 0; - } - } - - public static long convertToDateTime(int year, int month, int day, int hour, int minute, int second, - boolean isDate) { - long time = 0; - time = time + year; - time = (time << 8) + month; - time = (time << 8) + day; - time = (time << 8) + hour; - time = (time << 8) + minute; - time = (time << 12) + second; - int type = isDate ? 2 : 3; - time = (time << 3) + type; - //this bit is int neg = 0; - time = (time << 1); - return time; - } - - public static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second) { - return (long) second << 20 | (long) minute << 26 | (long) hour << 32 - | (long) day << 37 | (long) month << 42 | (long) year << 46; - } - - public static long convertToDateTimeV2( - int year, int month, int day, int hour, int minute, int second, int microsecond) { - return (long) microsecond | (long) second << 20 | (long) minute << 26 | (long) hour << 32 - | (long) day << 37 | (long) month << 42 | (long) year << 46; - } - - public static long convertToDateTimeV2(Object obj, Class clz) { - if (LocalDateTime.class.equals(clz)) { - LocalDateTime date = (LocalDateTime) obj; - return convertToDateTimeV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), date.getHour(), - date.getMinute(), date.getSecond()); - } else if (org.joda.time.DateTime.class.equals(clz)) { - org.joda.time.DateTime date = (org.joda.time.DateTime) obj; - return convertToDateTimeV2(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(), - date.getMinuteOfHour(), date.getSecondOfMinute(), date.getMillisOfSecond() * 1000); - } else if (org.joda.time.LocalDateTime.class.equals(clz)) { - org.joda.time.LocalDateTime date = (org.joda.time.LocalDateTime) obj; - return convertToDateTimeV2(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth(), date.getHourOfDay(), - date.getMinuteOfHour(), date.getSecondOfMinute(), date.getMillisOfSecond() * 1000); - } else { - return 0; - } - } - - public static int convertToDateV2(int year, int month, int day) { - return (int) (day | (long) month << 5 | (long) year << 9); - } - - public static int convertToDateV2(Object obj, Class clz) { - if (LocalDate.class.equals(clz)) { - LocalDate date = (LocalDate) obj; - return convertToDateV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth()); - } else if (java.util.Date.class.equals(clz)) { - java.util.Date date = (java.util.Date) obj; - return convertToDateV2(date.getYear(), date.getMonth(), date.getDay()); - } else if (org.joda.time.LocalDate.class.equals(clz)) { - org.joda.time.LocalDate date = (org.joda.time.LocalDate) obj; - return convertToDateV2(date.getYear(), date.getDayOfMonth(), date.getDayOfMonth()); - } else { - return 0; - } - } - - /** - * Change the order of the bytes, Because JVM is Big-Endian , x86 is Little-Endian. - */ - public static byte[] convertByteOrder(byte[] bytes) { - int length = bytes.length; - for (int i = 0; i < length / 2; ++i) { - byte temp = bytes[i]; - bytes[i] = bytes[length - 1 - i]; - bytes[length - 1 - i] = temp; - } - return bytes; - } - - // only used by nebula-graph - // transfer to an object that can copy to the block - public static Object convertObject(ValueWrapper value) { - try { - if (value.isLong()) { - return value.asLong(); - } - if (value.isBoolean()) { - return value.asBoolean(); - } - if (value.isDouble()) { - return value.asDouble(); - } - if (value.isString()) { - return value.asString(); - } - if (value.isTime()) { - return value.asTime().toString(); - } - if (value.isDate()) { - DateWrapper date = value.asDate(); - return LocalDate.of(date.getYear(), date.getMonth(), date.getDay()); - } - if (value.isDateTime()) { - DateTimeWrapper dateTime = value.asDateTime(); - return LocalDateTime.of(dateTime.getYear(), dateTime.getMonth(), dateTime.getDay(), - dateTime.getHour(), dateTime.getMinute(), dateTime.getSecond(), dateTime.getMicrosec() * 1000); - } - if (value.isVertex()) { - return value.asNode().toString(); - } - if (value.isEdge()) { - return value.asRelationship().toString(); - } - if (value.isPath()) { - return value.asPath().toString(); - } - if (value.isList()) { - return value.asList().toString(); - } - if (value.isSet()) { - return value.asSet().toString(); - } - if (value.isMap()) { - return value.asMap().toString(); - } - return null; - } catch (Exception e) { - return null; - } - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java deleted file mode 100644 index 1cf7e887d28cff..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java +++ /dev/null @@ -1,374 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.vec; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Column type for fields in vector table. Support complex nested types. - * date & datetime is deprecated, use datev2 & datetimev2 only. - * If decimalv2 is deprecated, we can unify decimal32 & decimal64 & decimal128 into decimal. - */ -public class ColumnType { - public static final int MAX_DECIMAL32_PRECISION = 9; - public static final int MAX_DECIMAL64_PRECISION = 18; - public static final int MAX_DECIMAL128_PRECISION = 38; - - public enum Type { - UNSUPPORTED(-1), - BYTE(1), // only for string, generated as array - BOOLEAN(1), - TINYINT(1), - SMALLINT(2), - INT(4), - BIGINT(8), - LARGEINT(16), - FLOAT(4), - DOUBLE(8), - DATEV2(4), - DATETIMEV2(8), - CHAR(-1), - VARCHAR(-1), - BINARY(-1), - DECIMALV2(16), - DECIMAL32(4), - DECIMAL64(8), - DECIMAL128(16), - STRING(-1), - ARRAY(-1), - MAP(-1), - STRUCT(-1); - - public final int size; - - Type(int size) { - this.size = size; - } - } - - private final Type type; - private final String name; - private List childNames; - private List childTypes; - private List fieldIndex; - // only used in char & varchar - private final int length; - // only used in decimal - private final int precision; - private final int scale; - - public ColumnType(String name, Type type) { - this.name = name; - this.type = type; - this.length = -1; - this.precision = -1; - this.scale = -1; - } - - public ColumnType(String name, Type type, int length) { - this.name = name; - this.type = type; - this.length = length; - this.precision = -1; - this.scale = -1; - } - - public ColumnType(String name, Type type, int precision, int scale) { - this.name = name; - this.type = type; - this.length = -1; - this.precision = precision; - this.scale = scale; - } - - public ColumnType(String name, Type type, int length, int precision, int scale) { - this.name = name; - this.type = type; - this.length = length; - this.precision = precision; - this.scale = scale; - } - - public List getChildNames() { - return childNames; - } - - public void setChildNames(List childNames) { - this.childNames = childNames; - } - - public List getChildTypes() { - return childTypes; - } - - public void setChildTypes(List childTypes) { - this.childTypes = childTypes; - } - - public List getFieldIndex() { - return fieldIndex; - } - - public void setFieldIndex(List fieldIndex) { - this.fieldIndex = fieldIndex; - } - - public int getTypeSize() { - return type.size; - } - - public boolean isUnsupported() { - return type == Type.UNSUPPORTED; - } - - public boolean isStringType() { - return type == Type.STRING || type == Type.BINARY || type == Type.CHAR || type == Type.VARCHAR; - } - - public boolean isComplexType() { - return type == Type.ARRAY || type == Type.MAP || type == Type.STRUCT; - } - - public boolean isArray() { - return type == Type.ARRAY; - } - - public boolean isMap() { - return type == Type.MAP; - } - - public boolean isStruct() { - return type == Type.STRUCT; - } - - public Type getType() { - return type; - } - - public String getName() { - return name; - } - - public int getLength() { - return length; - } - - public int getPrecision() { - return precision; - } - - public int getScale() { - return scale; - } - - public int metaSize() { - switch (type) { - case UNSUPPORTED: - // set nullMap address as 0. - return 1; - case ARRAY: - case MAP: - case STRUCT: - // array & map : [nullMap | offsets | ... ] - // struct : [nullMap | ... ] - int size = 2; - if (type == Type.STRUCT) { - size = 1; - } - for (ColumnType c : childTypes) { - size += c.metaSize(); - } - return size; - case STRING: - case BINARY: - case CHAR: - case VARCHAR: - // [nullMap | offsets | data ] - return 3; - default: - // [nullMap | data] - return 2; - } - } - - private static final Pattern digitPattern = Pattern.compile("(\\d+)"); - - private static int findNextNestedField(String commaSplitFields) { - int numLess = 0; - int numBracket = 0; - for (int i = 0; i < commaSplitFields.length(); i++) { - char c = commaSplitFields.charAt(i); - if (c == '<') { - numLess++; - } else if (c == '>') { - numLess--; - } else if (c == '(') { - numBracket++; - } else if (c == ')') { - numBracket--; - } else if (c == ',' && numLess == 0 && numBracket == 0) { - return i; - } - } - return commaSplitFields.length(); - } - - public static ColumnType parseType(String columnName, String hiveType) { - String lowerCaseType = hiveType.toLowerCase(); - Type type = Type.UNSUPPORTED; - int length = -1; - int precision = -1; - int scale = -1; - switch (lowerCaseType) { - case "boolean": - type = Type.BOOLEAN; - break; - case "tinyint": - type = Type.TINYINT; - break; - case "smallint": - type = Type.SMALLINT; - break; - case "int": - type = Type.INT; - break; - case "bigint": - type = Type.BIGINT; - break; - case "largeint": - type = Type.LARGEINT; - break; - case "float": - type = Type.FLOAT; - break; - case "double": - type = Type.DOUBLE; - break; - case "date": - type = Type.DATEV2; - break; - case "binary": - case "bytes": - type = Type.BINARY; - break; - case "string": - type = Type.STRING; - break; - default: - if (lowerCaseType.startsWith("timestamp")) { - type = Type.DATETIMEV2; - precision = 6; // default - Matcher match = digitPattern.matcher(lowerCaseType); - if (match.find()) { - precision = Integer.parseInt(match.group(1).trim()); - } - } else if (lowerCaseType.startsWith("char")) { - Matcher match = digitPattern.matcher(lowerCaseType); - if (match.find()) { - type = Type.CHAR; - length = Integer.parseInt(match.group(1).trim()); - } - } else if (lowerCaseType.startsWith("varchar")) { - Matcher match = digitPattern.matcher(lowerCaseType); - if (match.find()) { - type = Type.VARCHAR; - length = Integer.parseInt(match.group(1).trim()); - } - } else if (lowerCaseType.startsWith("decimal")) { - int s = lowerCaseType.indexOf('('); - int e = lowerCaseType.indexOf(')'); - if (s != -1 && e != -1) { - String[] ps = lowerCaseType.substring(s + 1, e).split(","); - precision = Integer.parseInt(ps[0].trim()); - scale = Integer.parseInt(ps[1].trim()); - if (lowerCaseType.startsWith("decimalv2")) { - type = Type.DECIMALV2; - } else if (lowerCaseType.startsWith("decimal32")) { - type = Type.DECIMAL32; - } else if (lowerCaseType.startsWith("decimal64")) { - type = Type.DECIMAL64; - } else if (lowerCaseType.startsWith("decimal128")) { - type = Type.DECIMAL128; - } else { - if (precision <= MAX_DECIMAL32_PRECISION) { - type = Type.DECIMAL32; - } else if (precision <= MAX_DECIMAL64_PRECISION) { - type = Type.DECIMAL64; - } else { - type = Type.DECIMAL128; - } - } - } - } else if (lowerCaseType.startsWith("array")) { - if (lowerCaseType.indexOf("<") == 5 - && lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) { - ColumnType nestedType = parseType("element", - lowerCaseType.substring(6, lowerCaseType.length() - 1)); - ColumnType arrayType = new ColumnType(columnName, Type.ARRAY); - arrayType.setChildTypes(Collections.singletonList(nestedType)); - return arrayType; - } - } else if (lowerCaseType.startsWith("map")) { - if (lowerCaseType.indexOf("<") == 3 - && lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) { - String keyValue = lowerCaseType.substring(4, lowerCaseType.length() - 1); - int index = findNextNestedField(keyValue); - if (index != keyValue.length() && index != 0) { - ColumnType keyType = parseType("key", keyValue.substring(0, index)); - ColumnType valueType = parseType("value", keyValue.substring(index + 1)); - ColumnType mapType = new ColumnType(columnName, Type.MAP); - mapType.setChildTypes(Arrays.asList(keyType, valueType)); - return mapType; - } - } - } else if (lowerCaseType.startsWith("struct")) { - if (lowerCaseType.indexOf("<") == 6 - && lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) { - String listFields = lowerCaseType.substring(7, lowerCaseType.length() - 1); - ArrayList fields = new ArrayList<>(); - ArrayList names = new ArrayList<>(); - while (listFields.length() > 0) { - int index = findNextNestedField(listFields); - int pivot = listFields.indexOf(':'); - if (pivot > 0 && pivot < listFields.length() - 1) { - fields.add(parseType(listFields.substring(0, pivot), - listFields.substring(pivot + 1, index))); - names.add(listFields.substring(0, pivot)); - listFields = listFields.substring(Math.min(index + 1, listFields.length())); - } else { - break; - } - } - if (listFields.isEmpty()) { - ColumnType structType = new ColumnType(columnName, Type.STRUCT); - structType.setChildTypes(fields); - structType.setChildNames(names); - return structType; - } - } - } - break; - } - return new ColumnType(columnName, type, length, precision, scale); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnValue.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnValue.java deleted file mode 100644 index 0d1c522f9cbf06..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnValue.java +++ /dev/null @@ -1,71 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.vec; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.List; - -/** - * Column value in vector column - */ -public interface ColumnValue { - // Get bytes directly when reading string value to avoid decoding&encoding - boolean canGetStringAsBytes(); - - boolean isNull(); - - boolean getBoolean(); - - // tinyint - byte getByte(); - - // smallint - short getShort(); - - int getInt(); - - float getFloat(); - - // bigint - long getLong(); - - double getDouble(); - - BigInteger getBigInteger(); - - BigDecimal getDecimal(); - - String getString(); - - byte[] getStringAsBytes(); - - LocalDate getDate(); - - LocalDateTime getDateTime(); - - byte[] getBytes(); - - void unpackArray(List values); - - void unpackMap(List keys, List values); - - void unpackStruct(List structFieldIndex, List values); -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/NativeColumnValue.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/NativeColumnValue.java deleted file mode 100644 index 8a0b4d2244cddd..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/NativeColumnValue.java +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.vec; - -/** - * Native types of data that can be directly copied. - */ -public interface NativeColumnValue { - public static class NativeValue { - public final Object baseObject; - public final long offset; - public final int length; - - public NativeValue(Object baseObject, long offset) { - this.baseObject = baseObject; - this.offset = offset; - this.length = -1; - } - - public NativeValue(Object baseObject, long offset, int length) { - this.baseObject = baseObject; - this.offset = offset; - this.length = length; - } - } - - boolean isNull(); - - /** - * Return null if the type can't be copied directly - */ - NativeValue getNativeValue(ColumnType.Type type); -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ScanPredicate.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ScanPredicate.java deleted file mode 100644 index e82f05c7d0a367..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ScanPredicate.java +++ /dev/null @@ -1,298 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.vec; - - -import org.apache.doris.common.jni.utils.OffHeap; -import org.apache.doris.common.jni.utils.TypeNativeBytes; -import org.apache.doris.common.jni.vec.ColumnType.Type; - -import org.apache.commons.lang3.StringUtils; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Reference to doris::JniConnector::ScanPredicate - */ -public class ScanPredicate { - public enum FilterOp { - FILTER_LARGER(">"), - FILTER_LARGER_OR_EQUAL(">="), - FILTER_LESS("<"), - FILTER_LESS_OR_EQUAL("<="), - FILTER_IN("in"), - FILTER_NOT_IN("not in"); - - public final String op; - - FilterOp(String op) { - this.op = op; - } - } - - private static FilterOp parseFilterOp(int op) { - switch (op) { - case 0: - return FilterOp.FILTER_LARGER; - case 1: - return FilterOp.FILTER_LARGER_OR_EQUAL; - case 2: - return FilterOp.FILTER_LESS; - case 3: - return FilterOp.FILTER_LESS_OR_EQUAL; - case 4: - return FilterOp.FILTER_IN; - default: - return FilterOp.FILTER_NOT_IN; - } - } - - public static class PredicateValue implements ColumnValue { - private final byte[] valueBytes; - private final ColumnType.Type type; - private final int scale; - - public PredicateValue(byte[] valueBytes, ColumnType.Type type, int scale) { - this.valueBytes = valueBytes; - this.type = type; - this.scale = scale; - } - - private Object inspectObject() { - ByteBuffer byteBuffer = ByteBuffer.wrap( - TypeNativeBytes.convertByteOrder(Arrays.copyOf(valueBytes, valueBytes.length))); - switch (type) { - case BOOLEAN: - return byteBuffer.get() == 1; - case TINYINT: - return byteBuffer.get(); - case SMALLINT: - return byteBuffer.getShort(); - case INT: - return byteBuffer.getInt(); - case BIGINT: - return byteBuffer.getLong(); - case LARGEINT: - return TypeNativeBytes.getBigInteger(Arrays.copyOf(valueBytes, valueBytes.length)); - case FLOAT: - return byteBuffer.getFloat(); - case DOUBLE: - return byteBuffer.getDouble(); - case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - return TypeNativeBytes.getDecimal(Arrays.copyOf(valueBytes, valueBytes.length), scale); - case CHAR: - case VARCHAR: - case STRING: - return new String(valueBytes, StandardCharsets.UTF_8); - case BINARY: - return valueBytes; - default: - return new Object(); - } - } - - @Override - public boolean canGetStringAsBytes() { - return false; - } - - @Override - public String toString() { - return inspectObject().toString(); - } - - @Override - public boolean isNull() { - return false; - } - - @Override - public boolean getBoolean() { - return (boolean) inspectObject(); - } - - @Override - public byte getByte() { - return (byte) inspectObject(); - } - - @Override - public short getShort() { - return (short) inspectObject(); - } - - @Override - public int getInt() { - return (int) inspectObject(); - } - - @Override - public float getFloat() { - return (float) inspectObject(); - } - - @Override - public long getLong() { - return (long) inspectObject(); - } - - @Override - public double getDouble() { - return (double) inspectObject(); - } - - @Override - public BigInteger getBigInteger() { - return (BigInteger) inspectObject(); - } - - @Override - public BigDecimal getDecimal() { - return (BigDecimal) inspectObject(); - } - - @Override - public String getString() { - return toString(); - } - - @Override - public byte[] getStringAsBytes() { - throw new UnsupportedOperationException(); - } - - @Override - public LocalDate getDate() { - return LocalDate.now(); - } - - @Override - public LocalDateTime getDateTime() { - return LocalDateTime.now(); - } - - @Override - public byte[] getBytes() { - return (byte[]) inspectObject(); - } - - @Override - public void unpackArray(List values) { - - } - - @Override - public void unpackMap(List keys, List values) { - - } - - @Override - public void unpackStruct(List structFieldIndex, List values) { - - } - } - - private final long bytesLength; - public final String columName; - public final ColumnType.Type type; - public final FilterOp op; - private final byte[][] values; - public final int scale; - - private ScanPredicate(long predicateAddress, Map nameToType) { - long address = predicateAddress; - int length = OffHeap.getInt(null, address); - address += 4; - byte[] nameBytes = new byte[length]; - OffHeap.copyMemory(null, address, nameBytes, OffHeap.BYTE_ARRAY_OFFSET, length); - columName = new String(nameBytes, StandardCharsets.UTF_8); - type = nameToType.getOrDefault(columName, Type.UNSUPPORTED); - address += length; - op = parseFilterOp(OffHeap.getInt(null, address)); - address += 4; - scale = OffHeap.getInt(null, address); - address += 4; - int numValues = OffHeap.getInt(null, address); - address += 4; - values = new byte[numValues][]; - for (int i = 0; i < numValues; i++) { - int valueLength = OffHeap.getInt(null, address); - address += 4; - byte[] valueBytes = new byte[valueLength]; - OffHeap.copyMemory(null, address, valueBytes, OffHeap.BYTE_ARRAY_OFFSET, valueLength); - address += valueLength; - values[i] = valueBytes; - } - bytesLength = address - predicateAddress; - } - - public PredicateValue[] predicateValues() { - PredicateValue[] result = new PredicateValue[values.length]; - for (int i = 0; i < values.length; i++) { - result[i] = new PredicateValue(values[i], type, scale); - } - return result; - } - - public static ScanPredicate[] parseScanPredicates(long predicatesAddress, ColumnType[] types) { - Map nameToType = new HashMap<>(); - for (ColumnType columnType : types) { - nameToType.put(columnType.getName(), columnType.getType()); - } - int numPredicates = OffHeap.getInt(null, predicatesAddress); - long nextPredicateAddress = predicatesAddress + 4; - ScanPredicate[] predicates = new ScanPredicate[numPredicates]; - for (int i = 0; i < numPredicates; i++) { - predicates[i] = new ScanPredicate(nextPredicateAddress, nameToType); - nextPredicateAddress += predicates[i].bytesLength; - } - return predicates; - } - - public void dump(StringBuilder sb) { - sb.append(columName).append(' ').append(op.op).append(' '); - if (op == FilterOp.FILTER_IN || op == FilterOp.FILTER_NOT_IN) { - sb.append('(').append(StringUtils.join(predicateValues(), ", ")).append(')'); - } else { - sb.append(predicateValues()[0]); - } - } - - public static String dump(ScanPredicate[] scanPredicates) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < scanPredicates.length; i++) { - if (i != 0) { - sb.append(" and "); - } - scanPredicates[i].dump(sb); - } - return sb.toString(); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/TableSchema.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/TableSchema.java deleted file mode 100644 index 421feb55a3fdd0..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/TableSchema.java +++ /dev/null @@ -1,83 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.vec; - -import org.apache.doris.thrift.TPrimitiveType; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import java.io.IOException; -import java.util.List; - -/** - * Used to parse the file structure of table-value-function type. - * like avro file. - */ -public class TableSchema { - private final List schemaColumns; - private final ObjectMapper objectMapper; - - public TableSchema(List schemaColumns) { - this.schemaColumns = schemaColumns; - this.objectMapper = new ObjectMapper(); - } - - public String getTableSchema() throws IOException { - try { - return objectMapper.writeValueAsString(schemaColumns); - } catch (JsonProcessingException e) { - throw new IOException(e); - } - } - - public static class SchemaColumn { - private String name; - private int type; - private SchemaColumn childColumn; - - public SchemaColumn() { - - } - - public String getName() { - return name; - } - - public SchemaColumn getChildColumn() { - return childColumn; - } - - public int getType() { - return type; - } - - public void setName(String name) { - this.name = name; - } - - public void setType(TPrimitiveType type) { - this.type = type.getValue(); - } - - public void addChildColumn(SchemaColumn childColumn) { - this.childColumn = childColumn; - } - } - -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java deleted file mode 100644 index 3998a1a3270aff..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java +++ /dev/null @@ -1,702 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.vec; - - -import org.apache.doris.common.jni.utils.OffHeap; -import org.apache.doris.common.jni.utils.TypeNativeBytes; -import org.apache.doris.common.jni.vec.ColumnType.Type; -import org.apache.doris.common.jni.vec.NativeColumnValue.NativeValue; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.charset.StandardCharsets; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.List; - -/** - * Reference to Apache Spark - * see WritableColumnVector - */ -public class VectorColumn { - // String is stored as array - // The default string length to initialize the capacity. - private static final int DEFAULT_STRING_LENGTH = 4; - - // NullMap column address - private long nullMap; - // Data column address - private long data; - - // For String / Array / Map. - private long offsets; - // Number of elements in vector column - private int capacity; - // Upper limit for the maximum capacity for this column. - private static final int MAX_CAPACITY = Integer.MAX_VALUE - 15; - private final ColumnType columnType; - - private int numNulls; - - private int appendIndex; - - // For nested column type: String / Array/ Map / Struct - private VectorColumn[] childColumns; - - public VectorColumn(ColumnType columnType, int capacity) { - this.columnType = columnType; - this.capacity = 0; - this.nullMap = 0; - this.data = 0; - this.offsets = 0; - this.numNulls = 0; - this.appendIndex = 0; - if (columnType.isComplexType()) { - List children = columnType.getChildTypes(); - childColumns = new VectorColumn[children.size()]; - for (int i = 0; i < children.size(); ++i) { - childColumns[i] = new VectorColumn(children.get(i), capacity); - } - } else if (columnType.isStringType()) { - childColumns = new VectorColumn[1]; - childColumns[0] = new VectorColumn(new ColumnType("#stringBytes", Type.BYTE), - capacity * DEFAULT_STRING_LENGTH); - } - - reserveCapacity(capacity); - } - - // restore the child of string column & restore meta column - public VectorColumn(long address, int capacity, ColumnType columnType) { - this.columnType = columnType; - this.capacity = capacity; - this.nullMap = 0; - this.data = address; - this.offsets = 0; - this.numNulls = 0; - this.appendIndex = capacity; - } - - // restore block column - public VectorColumn(ColumnType columnType, int numRows, long columnMetaAddress) { - if (columnType.isUnsupported()) { - throw new RuntimeException("Unsupported type for column: " + columnType.getName()); - } - long address = columnMetaAddress; - this.capacity = numRows; - this.columnType = columnType; - this.nullMap = OffHeap.getLong(null, address); - address += 8; - this.numNulls = 0; - if (this.nullMap != 0) { - for (int i = 0; i < numRows; ++i) { - if (isNullAt(i)) { - this.numNulls++; - } - } - } - this.appendIndex = numRows; - - if (columnType.isComplexType()) { - // todo: support complex type - throw new RuntimeException("Unhandled type: " + columnType); - } else if (columnType.isStringType()) { - this.offsets = OffHeap.getLong(null, address); - address += 8; - this.data = 0; - int length = OffHeap.getInt(null, this.offsets + (numRows - 1) * 4L); - childColumns = new VectorColumn[1]; - childColumns[0] = new VectorColumn(OffHeap.getLong(null, address), length, - new ColumnType("#stringBytes", Type.BYTE)); - } else { - this.data = OffHeap.getLong(null, address); - this.offsets = 0; - } - } - - public long nullMapAddress() { - return nullMap; - } - - public long dataAddress() { - return data; - } - - public long offsetAddress() { - return offsets; - } - - public ColumnType.Type getColumnTyp() { - return columnType.getType(); - } - - /** - * Release columns and meta information - */ - public void close() { - if (childColumns != null) { - for (int i = 0; i < childColumns.length; i++) { - childColumns[i].close(); - childColumns[i] = null; - } - childColumns = null; - } - - if (nullMap != 0) { - OffHeap.freeMemory(nullMap); - } - if (data != 0) { - OffHeap.freeMemory(data); - } - if (offsets != 0) { - OffHeap.freeMemory(offsets); - } - nullMap = 0; - data = 0; - offsets = 0; - capacity = 0; - numNulls = 0; - appendIndex = 0; - } - - private void throwReserveException(int requiredCapacity, Throwable cause) { - String message = "Cannot reserve enough bytes in off heap memory (" - + (requiredCapacity >= 0 ? "requested " + requiredCapacity + " bytes" : "integer overflow)."); - throw new RuntimeException(message, cause); - } - - private void reserve(int requiredCapacity) { - if (requiredCapacity < 0) { - throwReserveException(requiredCapacity, null); - } else if (requiredCapacity > capacity) { - int newCapacity = (int) Math.min(MAX_CAPACITY, requiredCapacity * 2L); - if (requiredCapacity <= newCapacity) { - try { - reserveCapacity(newCapacity); - } catch (OutOfMemoryError outOfMemoryError) { - throwReserveException(requiredCapacity, outOfMemoryError); - } - } else { - // overflow - throwReserveException(requiredCapacity, null); - } - } - } - - private void reserveCapacity(int newCapacity) { - long oldCapacity = capacity; - long oldOffsetSize = capacity * 4L; - long newOffsetSize = newCapacity * 4L; - long typeSize = columnType.getTypeSize(); - if (columnType.isUnsupported()) { - // do nothing - return; - } else if (typeSize != -1) { - this.data = OffHeap.reallocateMemory(data, oldCapacity * typeSize, newCapacity * typeSize); - } else if (columnType.isStringType()) { - this.offsets = OffHeap.reallocateMemory(offsets, oldOffsetSize, newOffsetSize); - } else { - throw new RuntimeException("Unhandled type: " + columnType); - } - // todo: support complex type - if (!"#stringBytes".equals(columnType.getName())) { - this.nullMap = OffHeap.reallocateMemory(nullMap, oldCapacity, newCapacity); - OffHeap.setMemory(nullMap + oldCapacity, (byte) 0, newCapacity - oldCapacity); - } - capacity = newCapacity; - } - - public void reset() { - if (childColumns != null) { - for (VectorColumn c : childColumns) { - c.reset(); - } - } - appendIndex = 0; - if (numNulls > 0) { - putNotNulls(0, capacity); - numNulls = 0; - } - } - - public boolean isNullAt(int rowId) { - if (nullMap == 0) { - return false; - } else { - return OffHeap.getByte(null, nullMap + rowId) == 1; - } - } - - public boolean hasNull() { - return numNulls > 0; - } - - private void putNotNulls(int rowId, int count) { - if (!hasNull()) { - return; - } - long offset = nullMap + rowId; - for (int i = 0; i < count; ++i, ++offset) { - OffHeap.putByte(null, offset, (byte) 0); - } - } - - public int appendNull(ColumnType.Type typeValue) { - reserve(appendIndex + 1); - putNull(appendIndex); - // append default value - switch (typeValue) { - case BOOLEAN: - return appendBoolean(false); - case TINYINT: - return appendByte((byte) 0); - case SMALLINT: - return appendShort((short) 0); - case INT: - return appendInt(0); - case BIGINT: - return appendLong(0); - case LARGEINT: - return appendBigInteger(BigInteger.ZERO); - case FLOAT: - return appendFloat(0); - case DOUBLE: - return appendDouble(0); - case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - return appendDecimal(new BigDecimal(0)); - case DATEV2: - return appendDate(LocalDate.MIN); - case DATETIMEV2: - return appendDateTime(LocalDateTime.MIN); - case CHAR: - case VARCHAR: - case STRING: - case BINARY: - return appendBytesAndOffset(new byte[0]); - default: - throw new RuntimeException("Unknown type value: " + typeValue); - } - } - - private void putNull(int rowId) { - OffHeap.putByte(null, nullMap + rowId, (byte) 1); - ++numNulls; - } - - public int appendBoolean(boolean v) { - reserve(appendIndex + 1); - putBoolean(appendIndex, v); - return appendIndex++; - } - - private void putBoolean(int rowId, boolean value) { - OffHeap.putByte(null, data + rowId, (byte) ((value) ? 1 : 0)); - } - - public boolean getBoolean(int rowId) { - return OffHeap.getByte(null, data + rowId) == 1; - } - - public int appendByte(byte v) { - reserve(appendIndex + 1); - putByte(appendIndex, v); - return appendIndex++; - } - - public void putByte(int rowId, byte value) { - OffHeap.putByte(null, data + (long) rowId, value); - } - - public byte getByte(int rowId) { - return OffHeap.getByte(null, data + (long) rowId); - } - - public int appendShort(short v) { - reserve(appendIndex + 1); - putShort(appendIndex, v); - return appendIndex++; - } - - private void putShort(int rowId, short value) { - OffHeap.putShort(null, data + 2L * rowId, value); - } - - public short getShort(int rowId) { - return OffHeap.getShort(null, data + 2L * rowId); - } - - public int appendInt(int v) { - reserve(appendIndex + 1); - putInt(appendIndex, v); - return appendIndex++; - } - - private void putInt(int rowId, int value) { - OffHeap.putInt(null, data + 4L * rowId, value); - } - - public int getInt(int rowId) { - return OffHeap.getInt(null, data + 4L * rowId); - } - - public int appendFloat(float v) { - reserve(appendIndex + 1); - putFloat(appendIndex, v); - return appendIndex++; - } - - private void putFloat(int rowId, float value) { - OffHeap.putFloat(null, data + rowId * 4L, value); - } - - public float getFloat(int rowId) { - return OffHeap.getFloat(null, data + rowId * 4L); - } - - public int appendLong(long v) { - reserve(appendIndex + 1); - putLong(appendIndex, v); - return appendIndex++; - } - - private void putLong(int rowId, long value) { - OffHeap.putLong(null, data + 8L * rowId, value); - } - - public long getLong(int rowId) { - return OffHeap.getLong(null, data + 8L * rowId); - } - - public int appendDouble(double v) { - reserve(appendIndex + 1); - putDouble(appendIndex, v); - return appendIndex++; - } - - private void putDouble(int rowId, double value) { - OffHeap.putDouble(null, data + rowId * 8L, value); - } - - public double getDouble(int rowId) { - return OffHeap.getDouble(null, data + rowId * 8L); - } - - public int appendBigInteger(BigInteger v) { - reserve(appendIndex + 1); - putBigInteger(appendIndex, v); - return appendIndex++; - } - - private void putBigInteger(int rowId, BigInteger v) { - int typeSize = columnType.getTypeSize(); - byte[] bytes = TypeNativeBytes.getBigIntegerBytes(v); - OffHeap.copyMemory(bytes, OffHeap.BYTE_ARRAY_OFFSET, null, data + (long) rowId * typeSize, typeSize); - } - - public byte[] getBigIntegerBytes(int rowId) { - int typeSize = columnType.getTypeSize(); - byte[] bytes = new byte[typeSize]; - OffHeap.copyMemory(null, data + (long) rowId * typeSize, bytes, OffHeap.BYTE_ARRAY_OFFSET, typeSize); - return bytes; - } - - public BigInteger getBigInteger(int rowId) { - return TypeNativeBytes.getBigInteger(getBigIntegerBytes(rowId)); - } - - public int appendDecimal(BigDecimal v) { - reserve(appendIndex + 1); - putDecimal(appendIndex, v); - return appendIndex++; - } - - private void putDecimal(int rowId, BigDecimal v) { - int typeSize = columnType.getTypeSize(); - byte[] bytes = TypeNativeBytes.getDecimalBytes(v, columnType.getScale(), typeSize); - OffHeap.copyMemory(bytes, OffHeap.BYTE_ARRAY_OFFSET, null, data + (long) rowId * typeSize, typeSize); - } - - public byte[] getDecimalBytes(int rowId) { - int typeSize = columnType.getTypeSize(); - byte[] bytes = new byte[typeSize]; - OffHeap.copyMemory(null, data + (long) rowId * typeSize, bytes, OffHeap.BYTE_ARRAY_OFFSET, typeSize); - return bytes; - } - - public BigDecimal getDecimal(int rowId) { - return TypeNativeBytes.getDecimal(getDecimalBytes(rowId), columnType.getScale()); - } - - public int appendDate(LocalDate v) { - reserve(appendIndex + 1); - putDate(appendIndex, v); - return appendIndex++; - } - - private void putDate(int rowId, LocalDate v) { - int date = TypeNativeBytes.convertToDateV2(v.getYear(), v.getMonthValue(), v.getDayOfMonth()); - OffHeap.putInt(null, data + rowId * 4L, date); - } - - public LocalDate getDate(int rowId) { - int date = OffHeap.getInt(null, data + rowId * 4L); - return TypeNativeBytes.convertToJavaDate(date); - } - - public int appendDateTime(LocalDateTime v) { - reserve(appendIndex + 1); - putDateTime(appendIndex, v); - return appendIndex++; - } - - public LocalDateTime getDateTime(int rowId) { - long time = OffHeap.getLong(null, data + rowId * 8L); - return TypeNativeBytes.convertToJavaDateTime(time); - } - - private void putDateTime(int rowId, LocalDateTime v) { - long time = TypeNativeBytes.convertToDateTimeV2(v.getYear(), v.getMonthValue(), v.getDayOfMonth(), v.getHour(), - v.getMinute(), v.getSecond(), v.getNano() / 1000); - OffHeap.putLong(null, data + rowId * 8L, time); - } - - private void putBytes(int rowId, byte[] src, int offset, int length) { - OffHeap.copyMemory(src, OffHeap.BYTE_ARRAY_OFFSET + offset, null, data + rowId, length); - } - - private byte[] getBytes(int rowId, int length) { - byte[] array = new byte[length]; - OffHeap.copyMemory(null, data + rowId, array, OffHeap.BYTE_ARRAY_OFFSET, length); - return array; - } - - public int appendBytes(byte[] src, int offset, int length) { - reserve(appendIndex + length); - int result = appendIndex; - putBytes(appendIndex, src, offset, length); - appendIndex += length; - return result; - } - - public int appendString(String str) { - byte[] bytes = str.getBytes(StandardCharsets.UTF_8); - return appendBytes(bytes, 0, bytes.length); - } - - public int appendBytesAndOffset(byte[] src) { - return appendBytesAndOffset(src, 0, src.length); - } - - public int appendBytesAndOffset(byte[] src, int offset, int length) { - int startOffset = childColumns[0].appendBytes(src, offset, length); - reserve(appendIndex + 1); - OffHeap.putInt(null, offsets + 4L * appendIndex, startOffset + length); - return appendIndex++; - } - - public int appendStringAndOffset(String str) { - byte[] bytes = str.getBytes(StandardCharsets.UTF_8); - return appendBytesAndOffset(bytes, 0, bytes.length); - } - - public byte[] getBytesWithOffset(int rowId) { - long endOffsetAddress = offsets + 4L * rowId; - int startOffset = rowId == 0 ? 0 : OffHeap.getInt(null, endOffsetAddress - 4); - int endOffset = OffHeap.getInt(null, endOffsetAddress); - return childColumns[0].getBytes(startOffset, endOffset - startOffset); - } - - public String getStringWithOffset(int rowId) { - byte[] bytes = getBytesWithOffset(rowId); - return new String(bytes, StandardCharsets.UTF_8); - } - - public void updateMeta(VectorColumn meta) { - if (columnType.isUnsupported()) { - meta.appendLong(0); - } else if (columnType.isStringType()) { - meta.appendLong(nullMap); - meta.appendLong(offsets); - meta.appendLong(childColumns[0].data); - } else if (columnType.isComplexType()) { - meta.appendLong(nullMap); - if (columnType.isArray() || columnType.isMap()) { - meta.appendLong(offsets); - } - for (VectorColumn c : childColumns) { - c.updateMeta(meta); - } - } else { - meta.appendLong(nullMap); - meta.appendLong(data); - } - } - - public void appendNativeValue(NativeColumnValue o) { - ColumnType.Type typeValue = columnType.getType(); - if (o == null || o.isNull()) { - appendNull(typeValue); - return; - } - NativeValue nativeValue = o.getNativeValue(typeValue); - if (nativeValue == null) { - // can't get native value, fall back to materialized value - appendValue((ColumnValue) o); - return; - } - if (nativeValue.length == -1) { - // java origin types - long typeSize = typeValue.size; - reserve(appendIndex + 1); - OffHeap.copyMemory(nativeValue.baseObject, nativeValue.offset, - null, data + typeSize * appendIndex, typeSize); - appendIndex++; - } else { - int byteLength = nativeValue.length; - VectorColumn bytesColumn = childColumns[0]; - int startOffset = bytesColumn.appendIndex; - bytesColumn.reserve(startOffset + byteLength); - OffHeap.copyMemory(nativeValue.baseObject, nativeValue.offset, - null, bytesColumn.data + startOffset, byteLength); - bytesColumn.appendIndex += byteLength; - OffHeap.putInt(null, offsets + 4L * appendIndex, startOffset + byteLength); - appendIndex++; - } - } - - public void appendValue(ColumnValue o) { - ColumnType.Type typeValue = columnType.getType(); - if (o == null || o.isNull()) { - appendNull(typeValue); - return; - } - - switch (typeValue) { - case BOOLEAN: - appendBoolean(o.getBoolean()); - break; - case TINYINT: - appendByte(o.getByte()); - break; - case SMALLINT: - appendShort(o.getShort()); - break; - case INT: - appendInt(o.getInt()); - break; - case BIGINT: - appendLong(o.getLong()); - break; - case LARGEINT: - appendBigInteger(o.getBigInteger()); - break; - case FLOAT: - appendFloat(o.getFloat()); - break; - case DOUBLE: - appendDouble(o.getDouble()); - break; - case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - appendDecimal(o.getDecimal()); - break; - case DATEV2: - appendDate(o.getDate()); - break; - case DATETIMEV2: - appendDateTime(o.getDateTime()); - break; - case CHAR: - case VARCHAR: - case STRING: - if (o.canGetStringAsBytes()) { - appendBytesAndOffset(o.getStringAsBytes()); - } else { - appendStringAndOffset(o.getString()); - } - break; - case BINARY: - appendBytesAndOffset(o.getBytes()); - break; - default: - throw new RuntimeException("Unknown type value: " + typeValue); - } - } - - // for test only. - public void dump(StringBuilder sb, int i) { - if (isNullAt(i)) { - sb.append("NULL"); - return; - } - - ColumnType.Type typeValue = columnType.getType(); - switch (typeValue) { - case BOOLEAN: - sb.append(getBoolean(i)); - break; - case TINYINT: - sb.append(getByte(i)); - break; - case SMALLINT: - sb.append(getShort(i)); - break; - case INT: - sb.append(getInt(i)); - break; - case BIGINT: - sb.append(getLong(i)); - break; - case LARGEINT: - sb.append(getBigInteger(i)); - break; - case FLOAT: - sb.append(getFloat(i)); - break; - case DOUBLE: - sb.append(getDouble(i)); - break; - case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - sb.append(getDecimal(i)); - break; - case DATEV2: - sb.append(getDate(i)); - break; - case DATETIMEV2: - sb.append(getDateTime(i)); - break; - case CHAR: - case VARCHAR: - case STRING: - case BINARY: - sb.append(getStringWithOffset(i)); - break; - default: - throw new RuntimeException("Unknown type value: " + typeValue); - } - } -} diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java deleted file mode 100644 index 63b6f1ac2a9038..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java +++ /dev/null @@ -1,151 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni.vec; - - -import org.apache.doris.common.jni.utils.OffHeap; -import org.apache.doris.common.jni.vec.ColumnType.Type; - -/** - * Store a batch of data as vector table. - */ -public class VectorTable { - private final VectorColumn[] columns; - private final ColumnType[] columnTypes; - private final String[] fields; - private final ScanPredicate[] predicates; - private final VectorColumn meta; - private int numRows; - - private final boolean isRestoreTable; - - public VectorTable(ColumnType[] types, String[] fields, ScanPredicate[] predicates, int capacity) { - this.columnTypes = types; - this.fields = fields; - this.columns = new VectorColumn[types.length]; - this.predicates = predicates; - int metaSize = 1; // number of rows - for (int i = 0; i < types.length; i++) { - columns[i] = new VectorColumn(types[i], capacity); - metaSize += types[i].metaSize(); - } - this.meta = new VectorColumn(new ColumnType("#meta", Type.BIGINT), metaSize); - this.numRows = 0; - this.isRestoreTable = false; - } - - public VectorTable(ColumnType[] types, String[] fields, long metaAddress) { - long address = metaAddress; - this.columnTypes = types; - this.fields = fields; - this.columns = new VectorColumn[types.length]; - this.predicates = new ScanPredicate[0]; - - this.numRows = (int) OffHeap.getLong(null, address); - address += 8; - int metaSize = 1; // number of rows - for (int i = 0; i < types.length; i++) { - columns[i] = new VectorColumn(types[i], numRows, address); - metaSize += types[i].metaSize(); - address += types[i].metaSize() * 8L; - } - this.meta = new VectorColumn(metaAddress, metaSize, new ColumnType("#meta", Type.BIGINT)); - this.isRestoreTable = true; - } - - public void appendNativeData(int fieldId, NativeColumnValue o) { - assert (!isRestoreTable); - columns[fieldId].appendNativeValue(o); - } - - public void appendData(int fieldId, ColumnValue o) { - assert (!isRestoreTable); - columns[fieldId].appendValue(o); - } - - public VectorColumn[] getColumns() { - return columns; - } - - public VectorColumn getColumn(int fieldId) { - return columns[fieldId]; - } - - public ColumnType[] getColumnTypes() { - return columnTypes; - } - - public String[] getFields() { - return fields; - } - - public void releaseColumn(int fieldId) { - assert (!isRestoreTable); - columns[fieldId].close(); - } - - public void setNumRows(int numRows) { - this.numRows = numRows; - } - - public int getNumRows() { - return this.numRows; - } - - public long getMetaAddress() { - if (!isRestoreTable) { - meta.reset(); - meta.appendLong(numRows); - for (VectorColumn c : columns) { - c.updateMeta(meta); - } - } - return meta.dataAddress(); - } - - public void reset() { - assert (!isRestoreTable); - for (VectorColumn column : columns) { - column.reset(); - } - meta.reset(); - } - - public void close() { - assert (!isRestoreTable); - for (int i = 0; i < columns.length; i++) { - releaseColumn(i); - } - meta.close(); - } - - // for test only. - public String dump(int rowLimit) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < rowLimit && i < numRows; i++) { - for (int j = 0; j < columns.length; j++) { - if (j != 0) { - sb.append(", "); - } - columns[j].dump(sb, i); - } - sb.append('\n'); - } - return sb.toString(); - } -} diff --git a/fe/be-java-extensions/java-common/src/main/resources/log4j.properties b/fe/be-java-extensions/java-common/src/main/resources/log4j.properties deleted file mode 100644 index 42de6dad482ad6..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/resources/log4j.properties +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -log4j.rootLogger=INFO, RollingFile -log4j.appender.RollingFile=org.apache.log4j.RollingFileAppender -log4j.appender.RollingFile.Threshold=INFO -log4j.appender.RollingFile.File=${logPath} -log4j.appender.RollingFile.Append=true -log4j.appender.RollingFile.MaxFileSize=10MB -log4j.appender.RollingFile.MaxBackupIndex=5 -log4j.appender.RollingFile.layout=org.apache.log4j.PatternLayout -log4j.appender.RollingFile.layout.ConversionPattern= %d{yyyy-MM-dd HH:mm:ss} %5p %t %-5l - %m%n \ No newline at end of file diff --git a/fe/be-java-extensions/java-common/src/main/resources/package.xml b/fe/be-java-extensions/java-common/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/java-common/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java b/fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java deleted file mode 100644 index e8a6d49df796ee..00000000000000 --- a/fe/be-java-extensions/java-common/src/test/java/org/apache/doris/common/jni/JniScannerTest.java +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.jni; - - -import org.apache.doris.common.jni.utils.OffHeap; -import org.apache.doris.common.jni.vec.VectorTable; - -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.util.HashMap; - -public class JniScannerTest { - @Test - public void testMockJniScanner() throws IOException { - OffHeap.setTesting(); - MockJniScanner scanner = new MockJniScanner(32, new HashMap() { - { - put("mock_rows", "128"); - put("required_fields", "boolean,tinyint,smallint,int,bigint,largeint,float,double," - + "date,timestamp,char,varchar,string,decimalv2,decimal64"); - put("columns_types", "boolean#tinyint#smallint#int#bigint#largeint#float#double#" - + "date#timestamp#char(10)#varchar(10)#string#decimalv2(12,4)#decimal64(10,3)"); - } - }); - scanner.open(); - long metaAddress = 0; - do { - metaAddress = scanner.getNextBatchMeta(); - if (metaAddress != 0) { - long rows = OffHeap.getLong(null, metaAddress); - Assert.assertEquals(32, rows); - - VectorTable restoreTable = new VectorTable(scanner.getTable().getColumnTypes(), - scanner.getTable().getFields(), metaAddress); - System.out.println(restoreTable.dump((int) rows)); - // Restored table is release by the origin table. - } - scanner.resetTable(); - } while (metaAddress != 0); - scanner.releaseTable(); - scanner.close(); - } -} diff --git a/fe/be-java-extensions/java-udf/pom.xml b/fe/be-java-extensions/java-udf/pom.xml deleted file mode 100644 index 67921aa2cf76af..00000000000000 --- a/fe/be-java-extensions/java-udf/pom.xml +++ /dev/null @@ -1,80 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - - java-udf - - - 8 - 8 - - - - - - org.apache.doris - java-common - ${project.version} - - - - com.esotericsoftware - reflectasm - 1.11.9 - - - - - java-udf - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/main/resources/package.xml - - - - - - - - - - make-assembly - package - - single - - - - - - - diff --git a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java deleted file mode 100644 index 70c6dcb9371044..00000000000000 --- a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java +++ /dev/null @@ -1,1091 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.exception.InternalException; -import org.apache.doris.common.exception.UdfRuntimeException; -import org.apache.doris.common.jni.utils.JNINativeMethod; -import org.apache.doris.common.jni.utils.JavaUdfDataType; -import org.apache.doris.common.jni.utils.UdfUtils; -import org.apache.doris.thrift.TFunction; -import org.apache.doris.thrift.TJavaUdfExecutorCtorParams; - -import com.esotericsoftware.reflectasm.MethodAccess; -import com.google.common.base.Preconditions; -import org.apache.log4j.Logger; -import org.apache.thrift.TDeserializer; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TBinaryProtocol; - -import java.io.IOException; -import java.lang.reflect.Array; -import java.lang.reflect.Method; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.math.RoundingMode; -import java.net.URLClassLoader; -import java.nio.charset.StandardCharsets; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -public abstract class BaseExecutor { - private static final Logger LOG = Logger.getLogger(BaseExecutor.class); - - // By convention, the function in the class must be called evaluate() - public static final String UDF_FUNCTION_NAME = "evaluate"; - public static final String UDAF_CREATE_FUNCTION = "create"; - public static final String UDAF_DESTROY_FUNCTION = "destroy"; - public static final String UDAF_ADD_FUNCTION = "add"; - public static final String UDAF_RESET_FUNCTION = "reset"; - public static final String UDAF_SERIALIZE_FUNCTION = "serialize"; - public static final String UDAF_DESERIALIZE_FUNCTION = "deserialize"; - public static final String UDAF_MERGE_FUNCTION = "merge"; - public static final String UDAF_RESULT_FUNCTION = "getValue"; - - // Object to deserialize ctor params from BE. - protected static final TBinaryProtocol.Factory PROTOCOL_FACTORY = new TBinaryProtocol.Factory(); - - protected Object udf; - // setup by init() and cleared by close() - protected URLClassLoader classLoader; - - // Return and argument types of the function inferred from the udf method - // signature. - // The JavaUdfDataType enum maps it to corresponding primitive type. - protected JavaUdfDataType[] argTypes; - protected JavaUdfDataType retType; - protected Class[] argClass; - protected MethodAccess methodAccess; - protected TFunction fn; - - /** - * Create a UdfExecutor, using parameters from a serialized thrift object. Used - * by - * the backend. - */ - - public BaseExecutor(byte[] thriftParams) throws Exception { - TJavaUdfExecutorCtorParams request = new TJavaUdfExecutorCtorParams(); - TDeserializer deserializer = new TDeserializer(PROTOCOL_FACTORY); - try { - deserializer.deserialize(request, thriftParams); - } catch (TException e) { - throw new InternalException(e.getMessage()); - } - Type[] parameterTypes = new Type[request.fn.arg_types.size()]; - for (int i = 0; i < request.fn.arg_types.size(); ++i) { - parameterTypes[i] = Type.fromThrift(request.fn.arg_types.get(i)); - } - fn = request.fn; - String jarFile = request.location; - Type funcRetType = Type.fromThrift(request.fn.ret_type); - init(request, jarFile, funcRetType, parameterTypes); - } - - public String debugString() { - String res = ""; - for (JavaUdfDataType type : argTypes) { - res = res + type.toString(); - if (type.getItemType() != null) { - res = res + " item: " + type.getItemType().toString() + " sql: " + type.getItemType().toSql(); - } - if (type.getKeyType() != null) { - res = res + " key: " + type.getKeyType().toString() + " sql: " + type.getKeyType().toSql(); - } - if (type.getValueType() != null) { - res = res + " key: " + type.getValueType().toString() + " sql: " + type.getValueType().toSql(); - } - } - res = res + " return type: " + retType.toString(); - if (retType.getItemType() != null) { - res = res + " item: " + retType.getItemType().toString() + " sql: " + retType.getItemType().toSql(); - } - if (retType.getKeyType() != null) { - res = res + " key: " + retType.getKeyType().toString() + " sql: " + retType.getKeyType().toSql(); - } - if (retType.getValueType() != null) { - res = res + " key: " + retType.getValueType().toString() + " sql: " + retType.getValueType().toSql(); - } - res = res + " methodAccess: " + methodAccess.toString(); - res = res + " fn.toString(): " + fn.toString(); - return res; - } - - protected abstract void init(TJavaUdfExecutorCtorParams request, String jarPath, - Type funcRetType, Type... parameterTypes) throws UdfRuntimeException; - - /** - * Close the class loader we may have created. - */ - public void close() { - if (classLoader != null) { - try { - classLoader.close(); - } catch (IOException e) { - // Log and ignore. - LOG.debug("Error closing the URLClassloader.", e); - } - } - // We are now un-usable (because the class loader has been - // closed), so null out method_ and classLoader_. - classLoader = null; - } - - public void copyTupleBasicResult(Object obj, long row, Class retClass, - long outputBufferBase, long charsAddress, long offsetsAddr, JavaUdfDataType retType) - throws UdfRuntimeException { - switch (retType.getPrimitiveType()) { - case BOOLEAN: { - boolean val = (boolean) obj; - UdfUtils.UNSAFE.putByte(outputBufferBase + row * retType.getLen(), - val ? (byte) 1 : 0); - break; - } - case TINYINT: { - UdfUtils.UNSAFE.putByte(outputBufferBase + row * retType.getLen(), - (byte) obj); - break; - } - case SMALLINT: { - UdfUtils.UNSAFE.putShort(outputBufferBase + row * retType.getLen(), - (short) obj); - break; - } - case INT: { - UdfUtils.UNSAFE.putInt(outputBufferBase + row * retType.getLen(), - (int) obj); - break; - } - case BIGINT: { - UdfUtils.UNSAFE.putLong(outputBufferBase + row * retType.getLen(), - (long) obj); - break; - } - case FLOAT: { - UdfUtils.UNSAFE.putFloat(outputBufferBase + row * retType.getLen(), - (float) obj); - break; - } - case DOUBLE: { - UdfUtils.UNSAFE.putDouble(outputBufferBase + row * retType.getLen(), - (double) obj); - break; - } - case DATE: { - long time = UdfUtils.convertToDate(obj, retClass); - UdfUtils.UNSAFE.putLong(outputBufferBase + row * retType.getLen(), time); - break; - } - case DATETIME: { - long time = UdfUtils.convertToDateTime(obj, retClass); - UdfUtils.UNSAFE.putLong(outputBufferBase + row * retType.getLen(), time); - break; - } - case DATEV2: { - int time = UdfUtils.convertToDateV2(obj, retClass); - UdfUtils.UNSAFE.putInt(outputBufferBase + row * retType.getLen(), time); - break; - } - case DATETIMEV2: { - long time = UdfUtils.convertToDateTimeV2(obj, retClass); - UdfUtils.UNSAFE.putLong(outputBufferBase + row * retType.getLen(), time); - break; - } - case LARGEINT: { - BigInteger data = (BigInteger) obj; - byte[] bytes = UdfUtils.convertByteOrder(data.toByteArray()); - - // here value is 16 bytes, so if result data greater than the maximum of 16 - // bytesit will return a wrong num to backend; - byte[] value = new byte[16]; - // check data is negative - if (data.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { - value[index] = bytes[index]; - } - - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - outputBufferBase + row * retType.getLen(), value.length); - break; - } - case DECIMALV2: { - BigDecimal retValue = ((BigDecimal) obj).setScale(9, RoundingMode.HALF_EVEN); - BigInteger data = retValue.unscaledValue(); - byte[] bytes = UdfUtils.convertByteOrder(data.toByteArray()); - // TODO: here is maybe overflow also, and may find a better way to handle - byte[] value = new byte[16]; - if (data.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - - for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { - value[index] = bytes[index]; - } - - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - outputBufferBase + row * retType.getLen(), value.length); - break; - } - case DECIMAL32: - case DECIMAL64: - case DECIMAL128I: { - BigDecimal retValue = ((BigDecimal) obj).setScale(retType.getScale(), RoundingMode.HALF_EVEN); - BigInteger data = retValue.unscaledValue(); - byte[] bytes = UdfUtils.convertByteOrder(data.toByteArray()); - // TODO: here is maybe overflow also, and may find a better way to handle - byte[] value = new byte[retType.getLen()]; - if (data.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - - for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { - value[index] = bytes[index]; - } - - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - outputBufferBase + row * retType.getLen(), value.length); - break; - } - case CHAR: - case VARCHAR: - case STRING: { - byte[] bytes = ((String) obj).getBytes(StandardCharsets.UTF_8); - long offset = UdfUtils.UNSAFE.getInt(null, offsetsAddr + 4L * (row - 1)); - int needLen = (int) (offset + bytes.length); - outputBufferBase = JNINativeMethod.resizeStringColumn(charsAddress, needLen); - offset += bytes.length; - UdfUtils.UNSAFE.putInt(null, offsetsAddr + 4L * row, Integer.parseUnsignedInt(String.valueOf(offset))); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, outputBufferBase + offset - bytes.length, - bytes.length); - break; - } - case ARRAY: - default: - throw new UdfRuntimeException("Unsupported return type: " + retType); - } - } - - public Object[] convertBasicArg(boolean isUdf, int argIdx, boolean isNullable, int rowStart, int rowEnd, - long nullMapAddr, long columnAddr, long strOffsetAddr) { - switch (argTypes[argIdx].getPrimitiveType()) { - case BOOLEAN: - return UdfConvert.convertBooleanArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case TINYINT: - return UdfConvert.convertTinyIntArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case SMALLINT: - return UdfConvert.convertSmallIntArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case INT: - return UdfConvert.convertIntArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case BIGINT: - return UdfConvert.convertBigIntArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case LARGEINT: - return UdfConvert.convertLargeIntArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case FLOAT: - return UdfConvert.convertFloatArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case DOUBLE: - return UdfConvert.convertDoubleArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr); - case CHAR: - case VARCHAR: - case STRING: - return UdfConvert - .convertStringArg(isNullable, rowStart, rowEnd, nullMapAddr, columnAddr, strOffsetAddr); - case DATE: // udaf maybe argClass[i + argClassOffset] need add +1 - return UdfConvert - .convertDateArg(isUdf ? argClass[argIdx] : argClass[argIdx + 1], isNullable, rowStart, rowEnd, - nullMapAddr, columnAddr); - case DATETIME: - return UdfConvert - .convertDateTimeArg(isUdf ? argClass[argIdx] : argClass[argIdx + 1], isNullable, rowStart, - rowEnd, nullMapAddr, columnAddr); - case DATEV2: - return UdfConvert - .convertDateV2Arg(isUdf ? argClass[argIdx] : argClass[argIdx + 1], isNullable, rowStart, rowEnd, - nullMapAddr, columnAddr); - case DATETIMEV2: - return UdfConvert - .convertDateTimeV2Arg(isUdf ? argClass[argIdx] : argClass[argIdx + 1], isNullable, rowStart, - rowEnd, nullMapAddr, columnAddr); - case DECIMALV2: - case DECIMAL128I: - return UdfConvert - .convertDecimalArg(argTypes[argIdx].getScale(), 16L, isNullable, rowStart, rowEnd, nullMapAddr, - columnAddr); - case DECIMAL32: - return UdfConvert - .convertDecimalArg(argTypes[argIdx].getScale(), 4L, isNullable, rowStart, rowEnd, nullMapAddr, - columnAddr); - case DECIMAL64: - return UdfConvert - .convertDecimalArg(argTypes[argIdx].getScale(), 8L, isNullable, rowStart, rowEnd, nullMapAddr, - columnAddr); - default: { - LOG.info("Not support type: " + argTypes[argIdx].toString()); - Preconditions.checkState(false, "Not support type: " + argTypes[argIdx].toString()); - break; - } - } - return null; - } - - public Object[] convertArrayArg(int argIdx, boolean isNullable, int rowStart, int rowEnd, long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr) { - Object[] argument = (Object[]) Array.newInstance(ArrayList.class, rowEnd - rowStart); - for (int row = rowStart; row < rowEnd; ++row) { - long offsetStart = UdfUtils.UNSAFE.getLong(null, offsetsAddr + 8L * (row - 1)); - long offsetEnd = UdfUtils.UNSAFE.getLong(null, offsetsAddr + 8L * (row)); - int currentRowNum = (int) (offsetEnd - offsetStart); - switch (argTypes[argIdx].getItemType().getPrimitiveType()) { - case BOOLEAN: { - argument[row - rowStart] = UdfConvert - .convertArrayBooleanArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case TINYINT: { - argument[row - rowStart] = UdfConvert - .convertArrayTinyIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case SMALLINT: { - argument[row - rowStart] = UdfConvert - .convertArraySmallIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case INT: { - argument[row - rowStart] = UdfConvert - .convertArrayIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case BIGINT: { - argument[row - rowStart] = UdfConvert - .convertArrayBigIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case LARGEINT: { - argument[row - rowStart] = UdfConvert - .convertArrayLargeIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case FLOAT: { - argument[row - rowStart] = UdfConvert - .convertArrayFloatArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DOUBLE: { - argument[row - rowStart] = UdfConvert - .convertArrayDoubleArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case CHAR: - case VARCHAR: - case STRING: { - argument[row - rowStart] = UdfConvert - .convertArrayStringArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr, strOffsetAddr); - break; - } - case DATE: { - argument[row - rowStart] = UdfConvert - .convertArrayDateArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DATETIME: { - argument[row - rowStart] = UdfConvert - .convertArrayDateTimeArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DATEV2: { - argument[row - rowStart] = UdfConvert - .convertArrayDateV2Arg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DATETIMEV2: { - argument[row - rowStart] = UdfConvert - .convertArrayDateTimeV2Arg(row, currentRowNum, offsetStart, isNullable, - nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMALV2: - case DECIMAL128: { - argument[row - rowStart] = UdfConvert - .convertArrayDecimalArg(argTypes[argIdx].getScale(), 16L, row, currentRowNum, - offsetStart, isNullable, nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMAL32: { - argument[row - rowStart] = UdfConvert - .convertArrayDecimalArg(argTypes[argIdx].getScale(), 4L, row, currentRowNum, - offsetStart, isNullable, nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMAL64: { - argument[row - rowStart] = UdfConvert - .convertArrayDecimalArg(argTypes[argIdx].getScale(), 8L, row, currentRowNum, - offsetStart, isNullable, nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - default: { - LOG.info("Not support: " + argTypes[argIdx]); - Preconditions.checkState(false, "Not support type " + argTypes[argIdx].toString()); - break; - } - } - } - return argument; - } - - public Object[] convertMapArg(PrimitiveType type, int argIdx, boolean isNullable, int rowStart, int rowEnd, - long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr, int scale) { - Object[] argument = (Object[]) Array.newInstance(ArrayList.class, rowEnd - rowStart); - for (int row = rowStart; row < rowEnd; ++row) { - long offsetStart = UdfUtils.UNSAFE.getLong(null, offsetsAddr + 8L * (row - 1)); - long offsetEnd = UdfUtils.UNSAFE.getLong(null, offsetsAddr + 8L * (row)); - int currentRowNum = (int) (offsetEnd - offsetStart); - switch (type) { - case BOOLEAN: { - argument[row - - rowStart] = UdfConvert - .convertArrayBooleanArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case TINYINT: { - argument[row - rowStart] = UdfConvert - .convertArrayTinyIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case SMALLINT: { - argument[row - rowStart] = UdfConvert - .convertArraySmallIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case INT: { - argument[row - rowStart] = UdfConvert - .convertArrayIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case BIGINT: { - argument[row - rowStart] = UdfConvert - .convertArrayBigIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case LARGEINT: { - argument[row - rowStart] = UdfConvert - .convertArrayLargeIntArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case FLOAT: { - argument[row - rowStart] = UdfConvert - .convertArrayFloatArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DOUBLE: { - argument[row - rowStart] = UdfConvert - .convertArrayDoubleArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case CHAR: - case VARCHAR: - case STRING: { - argument[row - rowStart] = UdfConvert - .convertArrayStringArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr, strOffsetAddr); - break; - } - case DATE: { - argument[row - rowStart] = UdfConvert - .convertArrayDateArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DATETIME: { - argument[row - rowStart] = UdfConvert - .convertArrayDateTimeArg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DATEV2: { - argument[row - rowStart] = UdfConvert - .convertArrayDateV2Arg(row, currentRowNum, offsetStart, isNullable, nullMapAddr, - nestedNullMapAddr, dataAddr); - break; - } - case DATETIMEV2: { - argument[row - rowStart] = UdfConvert - .convertArrayDateTimeV2Arg(row, currentRowNum, offsetStart, isNullable, - nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMALV2: - case DECIMAL128: { - argument[row - rowStart] = UdfConvert - .convertArrayDecimalArg(scale, 16L, row, currentRowNum, - offsetStart, isNullable, nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMAL32: { - argument[row - rowStart] = UdfConvert - .convertArrayDecimalArg(scale, 4L, row, currentRowNum, - offsetStart, isNullable, nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMAL64: { - argument[row - rowStart] = UdfConvert - .convertArrayDecimalArg(scale, 8L, row, currentRowNum, - offsetStart, isNullable, nullMapAddr, nestedNullMapAddr, dataAddr); - break; - } - default: { - LOG.info("Not support: " + argTypes[argIdx]); - Preconditions.checkState(false, "Not support type " + argTypes[argIdx].toString()); - break; - } - } - } - return argument; - } - - public Object[] buildHashMap(PrimitiveType keyType, PrimitiveType valueType, Object[] keyCol, Object[] valueCol) { - switch (keyType) { - case BOOLEAN: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case TINYINT: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case SMALLINT: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case INT: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case BIGINT: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case LARGEINT: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case FLOAT: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case DOUBLE: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case CHAR: - case VARCHAR: - case STRING: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case DATEV2: - case DATE: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case DATETIMEV2: - case DATETIME: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - case DECIMAL32: - case DECIMAL64: - case DECIMALV2: - case DECIMAL128: { - return new HashMapBuilder().get(keyCol, valueCol, valueType); - } - default: { - LOG.info("Not support: " + keyType); - Preconditions.checkState(false, "Not support type " + keyType.toString()); - break; - } - } - return null; - } - - public static class HashMapBuilder { - public Object[] get(Object[] keyCol, Object[] valueCol, PrimitiveType valueType) { - switch (valueType) { - case BOOLEAN: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case TINYINT: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case SMALLINT: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case INT: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case BIGINT: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case LARGEINT: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case FLOAT: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case DOUBLE: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case CHAR: - case VARCHAR: - case STRING: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case DATEV2: - case DATE: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case DATETIMEV2: - case DATETIME: { - return new BuildMapFromType().get(keyCol, valueCol); - } - case DECIMAL32: - case DECIMAL64: - case DECIMALV2: - case DECIMAL128: { - return new BuildMapFromType().get(keyCol, valueCol); - } - default: { - LOG.info("Not support: " + valueType); - Preconditions.checkState(false, "Not support type " + valueType.toString()); - break; - } - } - return null; - } - } - - public static class BuildMapFromType { - public Object[] get(Object[] keyCol, Object[] valueCol) { - Object[] retHashMap = new HashMap[keyCol.length]; - for (int colIdx = 0; colIdx < keyCol.length; colIdx++) { - HashMap hashMap = new HashMap<>(); - ArrayList keys = (ArrayList) (keyCol[colIdx]); - ArrayList values = (ArrayList) (valueCol[colIdx]); - for (int i = 0; i < keys.size(); i++) { - T1 key = keys.get(i); - T2 value = values.get(i); - if (!hashMap.containsKey(key)) { - hashMap.put(key, value); - } - } - retHashMap[colIdx] = hashMap; - } - return retHashMap; - } - } - - public void copyBatchBasicResultImpl(boolean isNullable, int numRows, Object[] result, long nullMapAddr, - long resColumnAddr, long strOffsetAddr, Method method) { - switch (retType.getPrimitiveType()) { - case BOOLEAN: { - UdfConvert.copyBatchBooleanResult(isNullable, numRows, (Boolean[]) result, nullMapAddr, resColumnAddr); - break; - } - case TINYINT: { - UdfConvert.copyBatchTinyIntResult(isNullable, numRows, (Byte[]) result, nullMapAddr, resColumnAddr); - break; - } - case SMALLINT: { - UdfConvert.copyBatchSmallIntResult(isNullable, numRows, (Short[]) result, nullMapAddr, resColumnAddr); - break; - } - case INT: { - UdfConvert.copyBatchIntResult(isNullable, numRows, (Integer[]) result, nullMapAddr, resColumnAddr); - break; - } - case BIGINT: { - UdfConvert.copyBatchBigIntResult(isNullable, numRows, (Long[]) result, nullMapAddr, resColumnAddr); - break; - } - case LARGEINT: { - UdfConvert.copyBatchLargeIntResult(isNullable, numRows, (BigInteger[]) result, nullMapAddr, - resColumnAddr); - break; - } - case FLOAT: { - UdfConvert.copyBatchFloatResult(isNullable, numRows, (Float[]) result, nullMapAddr, resColumnAddr); - break; - } - case DOUBLE: { - UdfConvert.copyBatchDoubleResult(isNullable, numRows, (Double[]) result, nullMapAddr, resColumnAddr); - break; - } - case CHAR: - case VARCHAR: - case STRING: { - UdfConvert.copyBatchStringResult(isNullable, numRows, (String[]) result, nullMapAddr, resColumnAddr, - strOffsetAddr); - break; - } - case DATE: { - UdfConvert.copyBatchDateResult(method.getReturnType(), isNullable, numRows, result, - nullMapAddr, resColumnAddr); - break; - } - case DATETIME: { - UdfConvert - .copyBatchDateTimeResult(method.getReturnType(), isNullable, numRows, result, - nullMapAddr, - resColumnAddr); - break; - } - case DATEV2: { - UdfConvert.copyBatchDateV2Result(method.getReturnType(), isNullable, numRows, result, - nullMapAddr, - resColumnAddr); - break; - } - case DATETIMEV2: { - UdfConvert.copyBatchDateTimeV2Result(method.getReturnType(), isNullable, numRows, - result, nullMapAddr, - resColumnAddr); - break; - } - case DECIMALV2: - case DECIMAL128I: { - UdfConvert.copyBatchDecimal128Result(retType.getScale(), isNullable, numRows, (BigDecimal[]) result, - nullMapAddr, - resColumnAddr); - break; - } - case DECIMAL32: { - UdfConvert.copyBatchDecimal32Result(retType.getScale(), isNullable, numRows, (BigDecimal[]) result, - nullMapAddr, - resColumnAddr); - break; - } - case DECIMAL64: { - UdfConvert.copyBatchDecimal64Result(retType.getScale(), isNullable, numRows, (BigDecimal[]) result, - nullMapAddr, - resColumnAddr); - break; - } - default: { - LOG.info("Not support return type: " + retType); - Preconditions.checkState(false, "Not support type: " + retType.toString()); - break; - } - } - } - - public void copyBatchArrayResultImpl(boolean isNullable, int numRows, Object[] result, long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr, - PrimitiveType type, int scale) { - long hasPutElementNum = 0; - for (int row = 0; row < numRows; ++row) { - hasPutElementNum = copyTupleArrayResultImpl(hasPutElementNum, isNullable, row, result[row], nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr, strOffsetAddr, type, scale); - } - } - - public long copyTupleArrayResultImpl(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr, - PrimitiveType type, int scale) { - switch (type) { - case BOOLEAN: { - hasPutElementNum = UdfConvert - .copyBatchArrayBooleanResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case TINYINT: { - hasPutElementNum = UdfConvert - .copyBatchArrayTinyIntResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case SMALLINT: { - hasPutElementNum = UdfConvert - .copyBatchArraySmallIntResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case INT: { - hasPutElementNum = UdfConvert - .copyBatchArrayIntResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case BIGINT: { - hasPutElementNum = UdfConvert - .copyBatchArrayBigIntResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case LARGEINT: { - hasPutElementNum = UdfConvert - .copyBatchArrayLargeIntResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case FLOAT: { - hasPutElementNum = UdfConvert - .copyBatchArrayFloatResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DOUBLE: { - hasPutElementNum = UdfConvert - .copyBatchArrayDoubleResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case CHAR: - case VARCHAR: - case STRING: { - hasPutElementNum = UdfConvert - .copyBatchArrayStringResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr, strOffsetAddr); - break; - } - case DATE: { - hasPutElementNum = UdfConvert - .copyBatchArrayDateResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DATETIME: { - hasPutElementNum = UdfConvert - .copyBatchArrayDateTimeResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DATEV2: { - hasPutElementNum = UdfConvert - .copyBatchArrayDateV2Result(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DATETIMEV2: { - hasPutElementNum = UdfConvert - .copyBatchArrayDateTimeV2Result(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMALV2: { - hasPutElementNum = UdfConvert - .copyBatchArrayDecimalResult(hasPutElementNum, isNullable, row, result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMAL32: { - hasPutElementNum = UdfConvert - .copyBatchArrayDecimalV3Result(scale, 4L, hasPutElementNum, isNullable, row, - result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMAL64: { - hasPutElementNum = UdfConvert - .copyBatchArrayDecimalV3Result(scale, 8L, hasPutElementNum, isNullable, row, - result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - case DECIMAL128: { - hasPutElementNum = UdfConvert - .copyBatchArrayDecimalV3Result(scale, 16L, hasPutElementNum, isNullable, row, - result, nullMapAddr, - offsetsAddr, nestedNullMapAddr, dataAddr); - break; - } - default: { - Preconditions.checkState(false, "Not support type in array: " + retType); - break; - } - } - return hasPutElementNum; - } - - public void buildArrayListFromHashMap(Object[] result, PrimitiveType keyType, PrimitiveType valueType, - Object[] keyCol, Object[] valueCol) { - switch (keyType) { - case BOOLEAN: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case TINYINT: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case SMALLINT: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case INT: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case BIGINT: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case LARGEINT: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case FLOAT: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case DOUBLE: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case CHAR: - case VARCHAR: - case STRING: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case DATEV2: - case DATE: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case DATETIMEV2: - case DATETIME: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - case DECIMAL32: - case DECIMAL64: - case DECIMALV2: - case DECIMAL128: { - new ArrayListBuilder().get(result, keyCol, valueCol, valueType); - break; - } - default: { - LOG.info("Not support: " + keyType); - Preconditions.checkState(false, "Not support type " + keyType.toString()); - break; - } - } - } - - public static class ArrayListBuilder { - public void get(Object[] map, Object[] keyCol, Object[] valueCol, PrimitiveType valueType) { - switch (valueType) { - case BOOLEAN: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case TINYINT: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case SMALLINT: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case INT: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case BIGINT: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case LARGEINT: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case FLOAT: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case DOUBLE: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case CHAR: - case VARCHAR: - case STRING: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case DATEV2: - case DATE: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case DATETIMEV2: - case DATETIME: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - case DECIMAL32: - case DECIMAL64: - case DECIMALV2: - case DECIMAL128: { - new BuildArrayFromType().get(map, keyCol, valueCol); - break; - } - default: { - LOG.info("Not support: " + valueType); - Preconditions.checkState(false, "Not support type " + valueType.toString()); - break; - } - } - } - } - - public static class BuildArrayFromType { - public void get(Object[] map, Object[] keyCol, Object[] valueCol) { - for (int colIdx = 0; colIdx < map.length; colIdx++) { - HashMap hashMap = (HashMap) map[colIdx]; - ArrayList keys = new ArrayList<>(); - ArrayList values = new ArrayList<>(); - for (Map.Entry entry : hashMap.entrySet()) { - keys.add(entry.getKey()); - values.add(entry.getValue()); - } - keyCol[colIdx] = keys; - valueCol[colIdx] = values; - } - } - } -} diff --git a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdafExecutor.java b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdafExecutor.java deleted file mode 100644 index 1679125b6eeadc..00000000000000 --- a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdafExecutor.java +++ /dev/null @@ -1,428 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.Pair; -import org.apache.doris.common.exception.UdfRuntimeException; -import org.apache.doris.common.jni.utils.JavaUdfDataType; -import org.apache.doris.common.jni.utils.UdfUtils; -import org.apache.doris.thrift.TJavaUdfExecutorCtorParams; - -import com.esotericsoftware.reflectasm.MethodAccess; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import org.apache.log4j.Logger; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.lang.reflect.Constructor; -import java.lang.reflect.Method; -import java.net.MalformedURLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; - -/** - * udaf executor. - */ -public class UdafExecutor extends BaseExecutor { - - private static final Logger LOG = Logger.getLogger(UdafExecutor.class); - - private HashMap allMethods; - private HashMap stateObjMap; - private Class retClass; - private int addIndex; - - /** - * Constructor to create an object. - */ - public UdafExecutor(byte[] thriftParams) throws Exception { - super(thriftParams); - } - - /** - * close and invoke destroy function. - */ - @Override - public void close() { - allMethods = null; - super.close(); - } - - public Object[] convertBasicArguments(int argIdx, boolean isNullable, int rowStart, int rowEnd, long nullMapAddr, - long columnAddr, long strOffsetAddr) { - return convertBasicArg(false, argIdx, isNullable, rowStart, rowEnd, nullMapAddr, columnAddr, strOffsetAddr); - } - - public Object[] convertArrayArguments(int argIdx, boolean isNullable, int rowStart, int rowEnd, long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr) { - return convertArrayArg(argIdx, isNullable, rowStart, rowEnd, nullMapAddr, offsetsAddr, nestedNullMapAddr, - dataAddr, strOffsetAddr); - } - - public Object[] convertMapArguments(int argIdx, boolean isNullable, int rowStart, int rowEnd, long nullMapAddr, - long offsetsAddr, long keyNestedNullMapAddr, long keyDataAddr, long keyStrOffsetAddr, - long valueNestedNullMapAddr, long valueDataAddr, long valueStrOffsetAddr) { - PrimitiveType keyType = argTypes[argIdx].getKeyType().getPrimitiveType(); - PrimitiveType valueType = argTypes[argIdx].getValueType().getPrimitiveType(); - Object[] keyCol = convertMapArg(keyType, argIdx, isNullable, rowStart, rowEnd, nullMapAddr, offsetsAddr, - keyNestedNullMapAddr, keyDataAddr, - keyStrOffsetAddr, argTypes[argIdx].getKeyScale()); - Object[] valueCol = convertMapArg(valueType, argIdx, isNullable, rowStart, rowEnd, nullMapAddr, offsetsAddr, - valueNestedNullMapAddr, - valueDataAddr, - valueStrOffsetAddr, argTypes[argIdx].getValueScale()); - return buildHashMap(keyType, valueType, keyCol, valueCol); - } - - public void addBatch(boolean isSinglePlace, int rowStart, int rowEnd, long placeAddr, int offset, Object[] column) - throws UdfRuntimeException { - if (isSinglePlace) { - addBatchSingle(rowStart, rowEnd, placeAddr, column); - } else { - addBatchPlaces(rowStart, rowEnd, placeAddr, offset, column); - } - } - - public void addBatchSingle(int rowStart, int rowEnd, long placeAddr, Object[] column) throws UdfRuntimeException { - try { - Long curPlace = placeAddr; - Object[] inputArgs = new Object[argTypes.length + 1]; - Object state = stateObjMap.get(curPlace); - if (state != null) { - inputArgs[0] = state; - } else { - Object newState = createAggState(); - stateObjMap.put(curPlace, newState); - inputArgs[0] = newState; - } - - Object[][] inputs = (Object[][]) column; - for (int i = 0; i < (rowEnd - rowStart); ++i) { - for (int j = 0; j < column.length; ++j) { - inputArgs[j + 1] = inputs[j][i]; - } - methodAccess.invoke(udf, addIndex, inputArgs); - } - } catch (Exception e) { - LOG.info("evaluate exception debug: " + debugString()); - LOG.info("invoke add function meet some error: " + e.getCause().toString()); - throw new UdfRuntimeException("UDAF failed to addBatchSingle: ", e); - } - } - - public void addBatchPlaces(int rowStart, int rowEnd, long placeAddr, int offset, Object[] column) - throws UdfRuntimeException { - try { - Object[][] inputs = (Object[][]) column; - ArrayList placeState = new ArrayList<>(rowEnd - rowStart); - for (int row = rowStart; row < rowEnd; ++row) { - Long curPlace = UdfUtils.UNSAFE.getLong(null, placeAddr + (8L * row)) + offset; - Object state = stateObjMap.get(curPlace); - if (state != null) { - placeState.add(state); - } else { - Object newState = createAggState(); - stateObjMap.put(curPlace, newState); - placeState.add(newState); - } - } - //spilt into two for loop - - Object[] inputArgs = new Object[argTypes.length + 1]; - for (int row = 0; row < (rowEnd - rowStart); ++row) { - inputArgs[0] = placeState.get(row); - for (int j = 0; j < column.length; ++j) { - inputArgs[j + 1] = inputs[j][row]; - } - methodAccess.invoke(udf, addIndex, inputArgs); - } - } catch (Exception e) { - LOG.info("evaluate exception debug: " + debugString()); - LOG.info("invoke add function meet some error: " + Arrays.toString(e.getStackTrace())); - throw new UdfRuntimeException("UDAF failed to addBatchPlaces: ", e); - } - } - - /** - * invoke user create function to get obj. - */ - public Object createAggState() throws UdfRuntimeException { - try { - return allMethods.get(UDAF_CREATE_FUNCTION).invoke(udf, null); - } catch (Exception e) { - LOG.warn("invoke createAggState function meet some error: " + e.getCause().toString()); - throw new UdfRuntimeException("UDAF failed to create: ", e); - } - } - - /** - * invoke destroy before colse. Here we destroy all data at once - */ - public void destroy() throws UdfRuntimeException { - try { - for (Object obj : stateObjMap.values()) { - allMethods.get(UDAF_DESTROY_FUNCTION).invoke(udf, obj); - } - stateObjMap.clear(); - } catch (Exception e) { - LOG.warn("invoke destroy function meet some error: " + e.getCause().toString()); - throw new UdfRuntimeException("UDAF failed to destroy: ", e); - } - } - - /** - * invoke serialize function and return byte[] to backends. - */ - public byte[] serialize(long place) throws UdfRuntimeException { - try { - Object[] args = new Object[2]; - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - args[0] = stateObjMap.get((Long) place); - args[1] = new DataOutputStream(baos); - allMethods.get(UDAF_SERIALIZE_FUNCTION).invoke(udf, args); - return baos.toByteArray(); - } catch (Exception e) { - LOG.info("evaluate exception debug: " + debugString()); - LOG.warn("invoke serialize function meet some error: " + e.getCause().toString()); - throw new UdfRuntimeException("UDAF failed to serialize: ", e); - } - } - - /* - * invoke reset function and reset the state to init. - */ - public void reset(long place) throws UdfRuntimeException { - try { - Object[] args = new Object[1]; - args[0] = stateObjMap.get((Long) place); - if (args[0] == null) { - return; - } - allMethods.get(UDAF_RESET_FUNCTION).invoke(udf, args); - } catch (Exception e) { - LOG.info("evaluate exception debug: " + debugString()); - LOG.warn("invoke reset function meet some error: " + e.getCause().toString()); - throw new UdfRuntimeException("UDAF failed to reset: ", e); - } - } - - /** - * invoke merge function and it's have done deserialze. - * here call deserialize first, and call merge. - */ - public void merge(long place, byte[] data) throws UdfRuntimeException { - try { - Object[] args = new Object[2]; - ByteArrayInputStream bins = new ByteArrayInputStream(data); - args[0] = createAggState(); - args[1] = new DataInputStream(bins); - allMethods.get(UDAF_DESERIALIZE_FUNCTION).invoke(udf, args); - args[1] = args[0]; - Long curPlace = place; - Object state = stateObjMap.get(curPlace); - if (state != null) { - args[0] = state; - } else { - Object newState = createAggState(); - stateObjMap.put(curPlace, newState); - args[0] = newState; - } - allMethods.get(UDAF_MERGE_FUNCTION).invoke(udf, args); - } catch (Exception e) { - LOG.info("evaluate exception debug: " + debugString()); - LOG.warn("invoke merge function meet some error: " + e.getCause().toString()); - throw new UdfRuntimeException("UDAF failed to merge: ", e); - } - } - - /** - * invoke getValue to return finally result. - */ - - public Object getValue(long place) throws UdfRuntimeException { - try { - if (stateObjMap.get(place) == null) { - stateObjMap.put(place, createAggState()); - } - return allMethods.get(UDAF_RESULT_FUNCTION).invoke(udf, stateObjMap.get((Long) place)); - } catch (Exception e) { - LOG.info("evaluate exception debug: " + debugString()); - LOG.warn("invoke getValue function meet some error: " + e.getCause().toString()); - throw new UdfRuntimeException("UDAF failed to result", e); - } - } - - public void copyTupleBasicResult(Object result, int row, long outputNullMapPtr, long outputBufferBase, - long charsAddress, - long offsetsAddr) throws UdfRuntimeException { - if (result == null) { - // put null obj - if (outputNullMapPtr == -1) { - throw new UdfRuntimeException("UDAF failed to store null data to not null column"); - } else { - UdfUtils.UNSAFE.putByte(outputNullMapPtr + row, (byte) 1); - } - return; - } - try { - if (outputNullMapPtr != -1) { - UdfUtils.UNSAFE.putByte(outputNullMapPtr + row, (byte) 0); - } - copyTupleBasicResult(result, row, retClass, outputBufferBase, charsAddress, - offsetsAddr, retType); - } catch (UdfRuntimeException e) { - LOG.info(e.toString()); - } - } - - public void copyTupleArrayResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr) throws UdfRuntimeException { - if (nullMapAddr > 0) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 0); - } - copyTupleArrayResultImpl(hasPutElementNum, isNullable, row, result, nullMapAddr, offsetsAddr, nestedNullMapAddr, - dataAddr, strOffsetAddr, retType.getItemType().getPrimitiveType(), retType.getScale()); - } - - public void copyTupleMapResult(long hasPutElementNum, boolean isNullable, int row, Object result, long nullMapAddr, - long offsetsAddr, - long keyNsestedNullMapAddr, long keyDataAddr, - long keyStrOffsetAddr, - long valueNsestedNullMapAddr, long valueDataAddr, long valueStrOffsetAddr) throws UdfRuntimeException { - if (nullMapAddr > 0) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 0); - } - PrimitiveType keyType = retType.getKeyType().getPrimitiveType(); - PrimitiveType valueType = retType.getValueType().getPrimitiveType(); - Object[] keyCol = new Object[1]; - Object[] valueCol = new Object[1]; - Object[] resultArr = new Object[1]; - resultArr[0] = result; - buildArrayListFromHashMap(resultArr, keyType, valueType, keyCol, valueCol); - copyTupleArrayResultImpl(hasPutElementNum, isNullable, row, - valueCol[0], nullMapAddr, offsetsAddr, - valueNsestedNullMapAddr, valueDataAddr, valueStrOffsetAddr, valueType, retType.getKeyScale()); - copyTupleArrayResultImpl(hasPutElementNum, isNullable, row, keyCol[0], nullMapAddr, offsetsAddr, - keyNsestedNullMapAddr, keyDataAddr, keyStrOffsetAddr, keyType, retType.getValueScale()); - } - - @Override - protected void init(TJavaUdfExecutorCtorParams request, String jarPath, Type funcRetType, - Type... parameterTypes) throws UdfRuntimeException { - String className = request.fn.aggregate_fn.symbol; - allMethods = new HashMap<>(); - stateObjMap = new HashMap<>(); - - ArrayList signatures = Lists.newArrayList(); - try { - ClassLoader loader; - if (jarPath != null) { - ClassLoader parent = getClass().getClassLoader(); - classLoader = UdfUtils.getClassLoader(jarPath, parent); - loader = classLoader; - } else { - // for test - loader = ClassLoader.getSystemClassLoader(); - } - Class c = Class.forName(className, true, loader); - methodAccess = MethodAccess.get(c); - Constructor ctor = c.getConstructor(); - udf = ctor.newInstance(); - Method[] methods = c.getDeclaredMethods(); - int idx = 0; - for (idx = 0; idx < methods.length; ++idx) { - signatures.add(methods[idx].toGenericString()); - switch (methods[idx].getName()) { - case UDAF_DESTROY_FUNCTION: - case UDAF_CREATE_FUNCTION: - case UDAF_MERGE_FUNCTION: - case UDAF_SERIALIZE_FUNCTION: - case UDAF_RESET_FUNCTION: - case UDAF_DESERIALIZE_FUNCTION: { - allMethods.put(methods[idx].getName(), methods[idx]); - break; - } - case UDAF_RESULT_FUNCTION: { - allMethods.put(methods[idx].getName(), methods[idx]); - Pair returnType = UdfUtils.setReturnType(funcRetType, - methods[idx].getReturnType()); - if (!returnType.first) { - LOG.debug("result function set return parameterTypes has error"); - } else { - retType = returnType.second; - retClass = methods[idx].getReturnType(); - } - break; - } - case UDAF_ADD_FUNCTION: { - allMethods.put(methods[idx].getName(), methods[idx]); - addIndex = methodAccess.getIndex(UDAF_ADD_FUNCTION); - argClass = methods[idx].getParameterTypes(); - if (argClass.length != parameterTypes.length + 1) { - LOG.debug("add function parameterTypes length not equal " + argClass.length + " " - + parameterTypes.length + " " + methods[idx].getName()); - } - if (!(parameterTypes.length == 0)) { - Pair inputType = UdfUtils.setArgTypes(parameterTypes, - argClass, true); - if (!inputType.first) { - LOG.debug("add function set arg parameterTypes has error"); - } else { - argTypes = inputType.second; - } - } else { - // Special case where the UDF doesn't take any input args - argTypes = new JavaUdfDataType[0]; - } - break; - } - default: - break; - } - } - if (idx == methods.length) { - return; - } - StringBuilder sb = new StringBuilder(); - sb.append("Unable to find evaluate function with the correct signature: ").append(className + ".evaluate(") - .append(Joiner.on(", ").join(parameterTypes)).append(")\n").append("UDF contains: \n ") - .append(Joiner.on("\n ").join(signatures)); - throw new UdfRuntimeException(sb.toString()); - - } catch (MalformedURLException e) { - throw new UdfRuntimeException("Unable to load jar.", e); - } catch (SecurityException e) { - throw new UdfRuntimeException("Unable to load function.", e); - } catch (ClassNotFoundException e) { - throw new UdfRuntimeException("Unable to find class.", e); - } catch (NoSuchMethodException e) { - throw new UdfRuntimeException("Unable to find constructor with no arguments.", e); - } catch (IllegalArgumentException e) { - throw new UdfRuntimeException("Unable to call UDAF constructor with no arguments.", e); - } catch (Exception e) { - throw new UdfRuntimeException("Unable to call create UDAF instance.", e); - } - } -} diff --git a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfConvert.java b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfConvert.java deleted file mode 100644 index 7b3a151f0065af..00000000000000 --- a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfConvert.java +++ /dev/null @@ -1,1774 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -import org.apache.doris.common.jni.utils.JNINativeMethod; -import org.apache.doris.common.jni.utils.OffHeap; -import org.apache.doris.common.jni.utils.UdfUtils; - -import com.google.common.base.Preconditions; -import org.apache.log4j.Logger; - -import java.lang.reflect.Array; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.math.RoundingMode; -import java.nio.charset.StandardCharsets; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.Arrays; - -public class UdfConvert { - private static final Logger LOG = Logger.getLogger(UdfConvert.class); - - public static Object[] convertBooleanArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - Boolean[] argument = new Boolean[rowsEnd - rowsStart]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getBoolean(null, columnAddr + i); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getBoolean(null, columnAddr + i); - } - } - return argument; - } - - public static Object[] convertTinyIntArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - Byte[] argument = new Byte[rowsEnd - rowsStart]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getByte(null, columnAddr + i); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getByte(null, columnAddr + i); - } - } - return argument; - } - - public static Object[] convertSmallIntArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - Short[] argument = new Short[rowsEnd - rowsStart]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getShort(null, columnAddr + (i * 2L)); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getShort(null, columnAddr + (i * 2L)); - } - } - return argument; - } - - public static Object[] convertIntArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - Integer[] argument = new Integer[rowsEnd - rowsStart]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getInt(null, columnAddr + (i * 4L)); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getInt(null, columnAddr + (i * 4L)); - } - } - return argument; - } - - public static Object[] convertBigIntArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - Long[] argument = new Long[rowsEnd - rowsStart]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getLong(null, columnAddr + (i * 8L)); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getLong(null, columnAddr + (i * 8L)); - } - } - return argument; - } - - public static Object[] convertFloatArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - Float[] argument = new Float[rowsEnd - rowsStart]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getFloat(null, columnAddr + (i * 4L)); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getFloat(null, columnAddr + (i * 4L)); - } - } - return argument; - } - - public static Object[] convertDoubleArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - Double[] argument = new Double[rowsEnd - rowsStart]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getDouble(null, columnAddr + (i * 8L)); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - argument[i - rowsStart] = UdfUtils.UNSAFE.getDouble(null, columnAddr + (i * 8L)); - } - } - return argument; - } - - public static Object[] convertDateArg(Class argTypeClass, boolean isNullable, int rowsStart, int rowsEnd, - long nullMapAddr, long columnAddr) { - Object[] argument = (Object[]) Array.newInstance(argTypeClass, rowsEnd - rowsStart); - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - long value = UdfUtils.UNSAFE.getLong(null, columnAddr + (i * 8L)); - argument[i - rowsStart] = UdfUtils.convertDateToJavaDate(value, argTypeClass); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - long value = UdfUtils.UNSAFE.getLong(null, columnAddr + (i * 8L)); - argument[i - rowsStart] = UdfUtils.convertDateToJavaDate(value, argTypeClass); - } - } - return argument; - } - - public static Object[] convertDateTimeArg(Class argTypeClass, boolean isNullable, int rowsStart, int rowsEnd, - long nullMapAddr, long columnAddr) { - Object[] argument = (Object[]) Array.newInstance(argTypeClass, rowsEnd - rowsStart); - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - long value = UdfUtils.UNSAFE.getLong(null, columnAddr + (i * 8L)); - argument[i - rowsStart] = UdfUtils - .convertDateTimeToJavaDateTime(value, argTypeClass); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - long value = UdfUtils.UNSAFE.getLong(null, columnAddr + (i * 8L)); - argument[i - rowsStart] = UdfUtils.convertDateTimeToJavaDateTime(value, argTypeClass); - } - } - return argument; - } - - public static Object[] convertDateV2Arg(Class argTypeClass, boolean isNullable, int rowsStart, int rowsEnd, - long nullMapAddr, long columnAddr) { - Object[] argument = (Object[]) Array.newInstance(argTypeClass, rowsEnd - rowsStart); - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - int value = UdfUtils.UNSAFE.getInt(null, columnAddr + (i * 4L)); - argument[i - rowsStart] = UdfUtils.convertDateV2ToJavaDate(value, argTypeClass); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - int value = UdfUtils.UNSAFE.getInt(null, columnAddr + (i * 4L)); - argument[i - rowsStart] = UdfUtils.convertDateV2ToJavaDate(value, argTypeClass); - } - } - return argument; - } - - public static Object[] convertDateTimeV2Arg(Class argTypeClass, boolean isNullable, int rowsStart, int rowsEnd, - long nullMapAddr, long columnAddr) { - Object[] argument = (Object[]) Array.newInstance(argTypeClass, rowsEnd - rowsStart); - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(null, nullMapAddr + i) == 0) { - long value = UdfUtils.UNSAFE.getLong(columnAddr + (i * 8L)); - argument[i - rowsStart] = UdfUtils - .convertDateTimeV2ToJavaDateTime(value, argTypeClass); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - long value = UdfUtils.UNSAFE.getLong(null, columnAddr + (i * 8L)); - argument[i - rowsStart] = UdfUtils - .convertDateTimeV2ToJavaDateTime(value, argTypeClass); - } - } - return argument; - } - - public static Object[] convertLargeIntArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long columnAddr) { - BigInteger[] argument = new BigInteger[rowsEnd - rowsStart]; - byte[] bytes = new byte[16]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - UdfUtils.copyMemory(null, columnAddr + (i * 16L), bytes, UdfUtils.BYTE_ARRAY_OFFSET, 16); - argument[i - rowsStart] = new BigInteger(UdfUtils.convertByteOrder(bytes)); - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - UdfUtils.copyMemory(null, columnAddr + (i * 16L), bytes, UdfUtils.BYTE_ARRAY_OFFSET, 16); - argument[i - rowsStart] = new BigInteger(UdfUtils.convertByteOrder(bytes)); - } - } - return argument; - } - - public static Object[] convertDecimalArg(int scale, long typeLen, boolean isNullable, int rowsStart, int rowsEnd, - long nullMapAddr, long columnAddr) { - BigDecimal[] argument = new BigDecimal[rowsEnd - rowsStart]; - byte[] bytes = new byte[(int) typeLen]; - if (isNullable) { - for (int i = rowsStart; i < rowsEnd; ++i) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + i) == 0) { - UdfUtils.copyMemory(null, columnAddr + (i * typeLen), bytes, UdfUtils.BYTE_ARRAY_OFFSET, typeLen); - BigInteger bigInteger = new BigInteger(UdfUtils.convertByteOrder(bytes)); - argument[i - rowsStart] = new BigDecimal(bigInteger, scale); //show to pass scale info - } // else is the current row is null - } - } else { - for (int i = rowsStart; i < rowsEnd; ++i) { - UdfUtils.copyMemory(null, columnAddr + (i * typeLen), bytes, UdfUtils.BYTE_ARRAY_OFFSET, typeLen); - BigInteger bigInteger = new BigInteger(UdfUtils.convertByteOrder(bytes)); - argument[i - rowsStart] = new BigDecimal(bigInteger, scale); - } - } - return argument; - } - - public static Object[] convertStringArg(boolean isNullable, int rowsStart, int rowsEnd, long nullMapAddr, - long charsAddr, long offsetsAddr) { - String[] argument = new String[rowsEnd - rowsStart]; - Preconditions.checkState(UdfUtils.UNSAFE.getInt(null, offsetsAddr + 4L * (0 - 1)) == 0, - "offsetsAddr[-1] should be 0;"); - final int totalLen = UdfUtils.UNSAFE.getInt(null, offsetsAddr + (rowsEnd - 1) * 4L); - byte[] bytes = new byte[totalLen]; - UdfUtils.copyMemory(null, charsAddr, bytes, UdfUtils.BYTE_ARRAY_OFFSET, totalLen); - if (isNullable) { - for (int row = rowsStart; row < rowsEnd; ++row) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - int prevOffset = UdfUtils.UNSAFE.getInt(null, offsetsAddr + 4L * (row - 1)); - int currOffset = UdfUtils.UNSAFE.getInt(null, offsetsAddr + row * 4L); - argument[row - rowsStart] = new String(bytes, prevOffset, currOffset - prevOffset, - StandardCharsets.UTF_8); - } // else is the current row is null - } - } else { - for (int row = rowsStart; row < rowsEnd; ++row) { - int prevOffset = UdfUtils.UNSAFE.getInt(null, offsetsAddr + 4L * (row - 1)); - int currOffset = UdfUtils.UNSAFE.getInt(null, offsetsAddr + 4L * row); - argument[row - rowsStart] = new String(bytes, prevOffset, currOffset - prevOffset, - StandardCharsets.UTF_8); - } - } - return argument; - } - - /////////////////////////////////////////copyBatch////////////////////////////////////////////////////////////// - public static void copyBatchBooleanResult(boolean isNullable, int numRows, Boolean[] result, long nullMapAddr, - long resColumnAddr) { - byte[] dataArr = new byte[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = result[i] ? (byte) 1 : 0; - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = result[i] ? (byte) 1 : 0; - } - } - UdfUtils.copyMemory(dataArr, UdfUtils.BYTE_ARRAY_OFFSET, null, resColumnAddr, numRows); - } - - public static void copyBatchTinyIntResult(boolean isNullable, int numRows, Byte[] result, long nullMapAddr, - long resColumnAddr) { - byte[] dataArr = new byte[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(dataArr, UdfUtils.BYTE_ARRAY_OFFSET, null, resColumnAddr, numRows); - } - - public static void copyBatchSmallIntResult(boolean isNullable, int numRows, Short[] result, long nullMapAddr, - long resColumnAddr) { - short[] dataArr = new short[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(dataArr, OffHeap.SHORT_ARRAY_OFFSET, null, resColumnAddr, numRows * 2L); - } - - public static void copyBatchIntResult(boolean isNullable, int numRows, Integer[] result, long nullMapAddr, - long resColumnAddr) { - int[] dataArr = new int[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(dataArr, UdfUtils.INT_ARRAY_OFFSET, null, resColumnAddr, numRows * 4L); - } - - public static void copyBatchBigIntResult(boolean isNullable, int numRows, Long[] result, long nullMapAddr, - long resColumnAddr) { - long[] dataArr = new long[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(dataArr, OffHeap.LONG_ARRAY_OFFSET, null, resColumnAddr, numRows * 8L); - } - - public static void copyBatchFloatResult(boolean isNullable, int numRows, Float[] result, long nullMapAddr, - long resColumnAddr) { - float[] dataArr = new float[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(dataArr, OffHeap.FLOAT_ARRAY_OFFSET, null, resColumnAddr, numRows * 4L); - } - - public static void copyBatchDoubleResult(boolean isNullable, int numRows, Double[] result, long nullMapAddr, - long resColumnAddr) { - double[] dataArr = new double[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = result[i]; - } - } - UdfUtils.copyMemory(dataArr, OffHeap.DOUBLE_ARRAY_OFFSET, null, resColumnAddr, numRows * 8L); - } - - public static void copyBatchDateResult(Class retClass, boolean isNullable, int numRows, Object[] result, - long nullMapAddr, - long resColumnAddr) { - long[] dataArr = new long[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = UdfUtils.convertToDate(result[i], retClass); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = UdfUtils.convertToDate(result[i], retClass); - } - } - UdfUtils.copyMemory(dataArr, OffHeap.LONG_ARRAY_OFFSET, null, resColumnAddr, numRows * 8L); - } - - public static void copyBatchDateV2Result(Class retClass, boolean isNullable, int numRows, Object[] result, - long nullMapAddr, - long resColumnAddr) { - int[] dataArr = new int[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = UdfUtils.convertToDateV2(result[i], retClass); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = UdfUtils.convertToDateV2(result[i], retClass); - } - } - UdfUtils.copyMemory(dataArr, OffHeap.INT_ARRAY_OFFSET, null, resColumnAddr, numRows * 4L); - } - - public static void copyBatchDateTimeResult(Class retClass, boolean isNullable, int numRows, Object[] result, - long nullMapAddr, long resColumnAddr) { - long[] dataArr = new long[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = UdfUtils.convertToDateTime(result[i], retClass); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = UdfUtils.convertToDateTime(result[i], retClass); - } - } - UdfUtils.copyMemory(dataArr, OffHeap.LONG_ARRAY_OFFSET, null, resColumnAddr, numRows * 8L); - } - - public static void copyBatchDateTimeV2Result(Class retClass, boolean isNullable, int numRows, - Object[] result, long nullMapAddr, long resColumnAddr) { - long[] dataArr = new long[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - dataArr[i] = UdfUtils.convertToDateTimeV2(result[i], retClass); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - dataArr[i] = UdfUtils.convertToDateTimeV2(result[i], retClass); - } - } - UdfUtils.copyMemory(dataArr, OffHeap.LONG_ARRAY_OFFSET, null, resColumnAddr, numRows * 8L); - } - - public static void copyBatchLargeIntResult(boolean isNullable, int numRows, BigInteger[] result, long nullMapAddr, - long resColumnAddr) { - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - byte[] bytes = UdfUtils.convertByteOrder(result[i].toByteArray()); - byte[] value = new byte[16]; - if (result[i].signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, resColumnAddr + (i * 16L), 16); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - byte[] bytes = UdfUtils.convertByteOrder(result[i].toByteArray()); - byte[] value = new byte[16]; - if (result[i].signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, resColumnAddr + (i * 16L), 16); - } - } - } - - public static void copyBatchDecimal32Result(int scale, boolean isNullable, int numRows, BigDecimal[] result, - long nullMapAddr, - long columnAddr) { - BigInteger[] data = new BigInteger[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - data[i] = result[i].setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - data[i] = result[i].setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - copyBatchDecimalResult(4, isNullable, numRows, data, columnAddr); - } - - public static void copyBatchDecimal64Result(int scale, boolean isNullable, int numRows, BigDecimal[] result, - long nullMapAddr, - long columnAddr) { - BigInteger[] data = new BigInteger[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - data[i] = result[i].setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - data[i] = result[i].setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - copyBatchDecimalResult(8, isNullable, numRows, data, columnAddr); - } - - - public static void copyBatchDecimal128Result(int scale, boolean isNullable, int numRows, BigDecimal[] result, - long nullMapAddr, - long columnAddr) { - BigInteger[] data = new BigInteger[numRows]; - if (isNullable) { - byte[] nulls = new byte[numRows]; - for (int i = 0; i < numRows; i++) { - if (result[i] == null) { - nulls[i] = 1; - } else { - data[i] = result[i].setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - UdfUtils.copyMemory(nulls, UdfUtils.BYTE_ARRAY_OFFSET, null, nullMapAddr, numRows); - } else { - for (int i = 0; i < numRows; i++) { - data[i] = result[i].setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - copyBatchDecimalResult(16, isNullable, numRows, data, columnAddr); - } - - private static void copyBatchDecimalResult(long typeLen, boolean isNullable, int numRows, BigInteger[] result, - long resColumnAddr) { - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (result[i] != null) { - byte[] bytes = UdfUtils.convertByteOrder(result[i].toByteArray()); - byte[] value = new byte[(int) typeLen]; - if (result[i].signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, resColumnAddr + (i * typeLen), - value.length); - } - } - } else { - for (int i = 0; i < numRows; i++) { - byte[] bytes = UdfUtils.convertByteOrder(result[i].toByteArray()); - byte[] value = new byte[(int) typeLen]; - if (result[i].signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, resColumnAddr + (i * typeLen), - value.length); - } - } - } - - private static final byte[] emptyBytes = new byte[0]; - - public static void copyBatchStringResult(boolean isNullable, int numRows, String[] strResult, long nullMapAddr, - long charsAddr, long offsetsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (strResult[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - byteRes[i] = ((String) strResult[i]).getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - byteRes[i] = ((String) strResult[i]).getBytes(StandardCharsets.UTF_8); - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - - //////////////////////////////////// copyBatchArray////////////////////////////////////////////////////////// - - public static long copyBatchArrayBooleanResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Boolean value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(dataAddr + ((hasPutElementNum + i)), value ? (byte) 1 : 0); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Boolean value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(dataAddr + ((hasPutElementNum + i)), value ? (byte) 1 : 0); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayTinyIntResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Byte value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(dataAddr + ((hasPutElementNum + i)), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Byte value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(dataAddr + ((hasPutElementNum + i)), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArraySmallIntResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Short value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putShort(dataAddr + ((hasPutElementNum + i) * 2L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Short value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putShort(dataAddr + ((hasPutElementNum + i) * 2L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayIntResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Integer value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putInt(dataAddr + ((hasPutElementNum + i) * 4L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Integer value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putInt(dataAddr + ((hasPutElementNum + i) * 4L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayBigIntResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Long value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Long value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayFloatResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Float value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putFloat(dataAddr + ((hasPutElementNum + i) * 4L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Float value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putFloat(dataAddr + ((hasPutElementNum + i) * 4L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayDoubleResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Double value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putDouble(dataAddr + ((hasPutElementNum + i) * 8L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - Double value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - UdfUtils.UNSAFE.putDouble(dataAddr + ((hasPutElementNum + i) * 8L), value); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayDateResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDate value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - long time = UdfUtils.convertToDate(value, LocalDate.class); - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDate value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - long time = UdfUtils.convertToDate(value, LocalDate.class); - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayDateTimeResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDateTime value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - long time = UdfUtils.convertToDateTime(value, LocalDateTime.class); - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDateTime value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - long time = UdfUtils.convertToDateTime(value, LocalDateTime.class); - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayDateV2Result(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDate value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - int time = UdfUtils.convertToDateV2(value, LocalDate.class); - UdfUtils.UNSAFE.putInt(dataAddr + ((hasPutElementNum + i) * 4L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDate value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - int time = UdfUtils.convertToDateV2(value, LocalDate.class); - UdfUtils.UNSAFE.putInt(dataAddr + ((hasPutElementNum + i) * 4L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayDateTimeV2Result(long hasPutElementNum, boolean isNullable, int row, - Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDateTime value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - long time = UdfUtils.convertToDateTimeV2(value, LocalDateTime.class); - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - LocalDateTime value = data.get(i); - if (value == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - long time = UdfUtils.convertToDateTimeV2(value, LocalDateTime.class); - UdfUtils.UNSAFE.putLong(dataAddr + ((hasPutElementNum + i) * 8L), time); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayLargeIntResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - BigInteger bigInteger = data.get(i); - if (bigInteger == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); - byte[] value = new byte[16]; - // check data is negative - if (bigInteger.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - dataAddr + ((hasPutElementNum + i) * 16L), 16); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - BigInteger bigInteger = data.get(i); - if (bigInteger == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); - byte[] value = new byte[16]; - // check data is negative - if (bigInteger.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - dataAddr + ((hasPutElementNum + i) * 16L), 16); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayDecimalResult(long hasPutElementNum, boolean isNullable, int row, Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - BigDecimal bigDecimal = data.get(i); - if (bigDecimal == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - BigInteger bigInteger = bigDecimal.setScale(9, RoundingMode.HALF_EVEN).unscaledValue(); - byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); - byte[] value = new byte[16]; - // check data is negative - if (bigInteger.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - dataAddr + ((hasPutElementNum + i) * 16L), 16); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - BigDecimal bigDecimal = data.get(i); - if (bigDecimal == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - BigInteger bigInteger = bigDecimal.setScale(9, RoundingMode.HALF_EVEN).unscaledValue(); - byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); - byte[] value = new byte[16]; - // check data is negative - if (bigInteger.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - dataAddr + ((hasPutElementNum + i) * 16L), 16); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayDecimalV3Result(int scale, long typeLen, long hasPutElementNum, boolean isNullable, - int row, - Object result, - long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - BigDecimal bigDecimal = data.get(i); - if (bigDecimal == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - BigInteger bigInteger = bigDecimal.setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); - byte[] value = new byte[(int) typeLen]; - // check data is negative - if (bigInteger.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - dataAddr + ((hasPutElementNum + i) * typeLen), typeLen); - } - } - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - for (int i = 0; i < num; ++i) { - BigDecimal bigDecimal = data.get(i); - if (bigDecimal == null) { - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - BigInteger bigInteger = bigDecimal.setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - byte[] bytes = UdfUtils.convertByteOrder(bigInteger.toByteArray()); - byte[] value = new byte[(int) typeLen]; - // check data is negative - if (bigInteger.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, - dataAddr + ((hasPutElementNum + i) * typeLen), typeLen); - } - } - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - public static long copyBatchArrayStringResult(long hasPutElementNum, boolean isNullable, int row, - Object result, long nullMapAddr, long offsetsAddr, long nestedNullMapAddr, long dataAddr, - long strOffsetAddr) { - ArrayList data = (ArrayList) result; - if (isNullable) { - if (data == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + row, (byte) 1); - } else { - int num = data.size(); - int[] offsets = new int[num]; - byte[][] byteRes = new byte[num][]; - int oldOffsetNum = UdfUtils.UNSAFE.getInt(null, strOffsetAddr + ((hasPutElementNum - 1) * 4L)); - int offset = oldOffsetNum; - for (int i = 0; i < num; ++i) { - String value = data.get(i); - if (value == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - byteRes[i] = value.getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - int oldSzie = 0; - if (num > 0) { - oldSzie = offsets[num - 1]; - } - byte[] bytes = new byte[oldSzie - oldOffsetNum]; - long bytesAddr = JNINativeMethod.resizeStringColumn(dataAddr, oldSzie); - int dst = 0; - for (int i = 0; i < num; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, strOffsetAddr + (4L * hasPutElementNum), - num * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr + oldOffsetNum, - oldSzie - oldOffsetNum); - hasPutElementNum = hasPutElementNum + num; - } - } else { - int num = data.size(); - int[] offsets = new int[num]; - byte[][] byteRes = new byte[num][]; - int offset = 0; - for (int i = 0; i < num; ++i) { - String value = data.get(i); - if (value == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nestedNullMapAddr + row, (byte) 1); - } else { - byteRes[i] = value.getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - int oldOffsetNum = UdfUtils.UNSAFE.getInt(null, strOffsetAddr + ((hasPutElementNum - 1) * 4L)); - int oldSzie = 0; - if (num > 0) { - oldSzie = offsets[num - 1]; - } - byte[] bytes = new byte[oldSzie]; - long bytesAddr = JNINativeMethod.resizeStringColumn(dataAddr, oldOffsetNum + oldSzie); - int dst = 0; - for (int i = 0; i < num; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, strOffsetAddr + (4L * oldOffsetNum), - num * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr + oldOffsetNum, - oldSzie); - hasPutElementNum = hasPutElementNum + num; - } - UdfUtils.UNSAFE.putLong(null, offsetsAddr + 8L * row, hasPutElementNum); - return hasPutElementNum; - } - - //////////////////////////////////////////convertArray/////////////////////////////////////////////////////////// - public static ArrayList convertArrayBooleanArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - boolean value = UdfUtils.UNSAFE.getBoolean(null, dataAddr + (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - boolean value = UdfUtils.UNSAFE.getBoolean(null, dataAddr + (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayTinyIntArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - byte value = UdfUtils.UNSAFE.getByte(null, dataAddr + (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - byte value = UdfUtils.UNSAFE.getByte(null, dataAddr + (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArraySmallIntArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - short value = UdfUtils.UNSAFE.getShort(null, dataAddr + 2L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - short value = UdfUtils.UNSAFE.getShort(null, dataAddr + 2L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayIntArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - int value = UdfUtils.UNSAFE.getInt(null, dataAddr + 4L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - int value = UdfUtils.UNSAFE.getInt(null, dataAddr + 4L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayBigIntArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayFloatArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - float value = UdfUtils.UNSAFE.getFloat(null, dataAddr + 4L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - float value = UdfUtils.UNSAFE.getFloat(null, dataAddr + 4L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayDoubleArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - double value = UdfUtils.UNSAFE.getDouble(null, dataAddr + 8L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - double value = UdfUtils.UNSAFE.getDouble(null, dataAddr + 8L * (offsetRow + offsetStart)); - data.add(value); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayDateArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - LocalDate obj = (LocalDate) UdfUtils.convertDateToJavaDate(value, LocalDate.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - // TODO: now argClass[argIdx + argClassOffset] is java.util.ArrayList, can't get - // nested class type, so don't know the date type class is LocalDate or others - // LocalDate obj = UdfUtils.convertDateToJavaDate(value, argClass[argIdx + - // argClassOffset]); - LocalDate obj = (LocalDate) UdfUtils.convertDateToJavaDate(value, LocalDate.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayDateTimeArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - LocalDateTime obj = (LocalDateTime) UdfUtils - .convertDateTimeToJavaDateTime(value, LocalDateTime.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - LocalDateTime obj = (LocalDateTime) UdfUtils - .convertDateTimeToJavaDateTime(value, LocalDateTime.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayDateV2Arg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - int value = UdfUtils.UNSAFE.getInt(null, dataAddr + 4L * (offsetRow + offsetStart)); - LocalDate obj = (LocalDate) UdfUtils.convertDateV2ToJavaDate(value, LocalDate.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - int value = UdfUtils.UNSAFE.getInt(null, dataAddr + 4L * (offsetRow + offsetStart)); - LocalDate obj = (LocalDate) UdfUtils.convertDateV2ToJavaDate(value, LocalDate.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayDateTimeV2Arg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - LocalDateTime obj = (LocalDateTime) UdfUtils - .convertDateTimeV2ToJavaDateTime(value, LocalDateTime.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - long value = UdfUtils.UNSAFE.getLong(null, dataAddr + 8L * (offsetRow + offsetStart)); - LocalDateTime obj = (LocalDateTime) UdfUtils - .convertDateTimeV2ToJavaDateTime(value, LocalDateTime.class); - data.add(obj); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayLargeIntArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - byte[] bytes = new byte[16]; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - UdfUtils.copyMemory(null, dataAddr + 16L * (offsetRow + offsetStart), bytes, - UdfUtils.BYTE_ARRAY_OFFSET, 16); - data.add(new BigInteger(UdfUtils.convertByteOrder(bytes))); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - UdfUtils.copyMemory(null, dataAddr + 16L * (offsetRow + offsetStart), bytes, - UdfUtils.BYTE_ARRAY_OFFSET, 16); - data.add(new BigInteger(UdfUtils.convertByteOrder(bytes))); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayDecimalArg(int scale, long typeLen, int row, int currentRowNum, - long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr) { - ArrayList data = null; - byte[] bytes = new byte[16]; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - UdfUtils.copyMemory(null, dataAddr + typeLen * (offsetRow + offsetStart), bytes, - UdfUtils.BYTE_ARRAY_OFFSET, typeLen); - BigInteger bigInteger = new BigInteger(UdfUtils.convertByteOrder(bytes)); - data.add(new BigDecimal(bigInteger, scale)); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // else is current array row is null - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - UdfUtils.copyMemory(null, dataAddr + typeLen * (offsetRow + offsetStart), bytes, - UdfUtils.BYTE_ARRAY_OFFSET, typeLen); - BigInteger bigInteger = new BigInteger(UdfUtils.convertByteOrder(bytes)); - data.add(new BigDecimal(bigInteger, scale)); - } else { // in the array row, current offset is null - data.add(null); - } - } // for loop - } // end for all current row - return data; - } - - public static ArrayList convertArrayStringArg(int row, int currentRowNum, long offsetStart, - boolean isNullable, long nullMapAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr) { - ArrayList data = null; - if (isNullable) { - if (UdfUtils.UNSAFE.getByte(nullMapAddr + row) == 0) { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) - == 0)) { - int offset = UdfUtils.UNSAFE - .getInt(null, strOffsetAddr + (offsetRow + offsetStart) * 4L); - int numBytes = offset - UdfUtils.UNSAFE - .getInt(null, strOffsetAddr + 4L * ((offsetRow + offsetStart) - 1)); - long base = dataAddr + offset - numBytes; - byte[] bytes = new byte[numBytes]; - UdfUtils.copyMemory(null, base, bytes, UdfUtils.BYTE_ARRAY_OFFSET, numBytes); - data.add(new String(bytes, StandardCharsets.UTF_8)); - } else { - data.add(null); - } - } - } - } else { - data = new ArrayList<>(currentRowNum); - for (int offsetRow = 0; offsetRow < currentRowNum; ++offsetRow) { - if ((UdfUtils.UNSAFE.getByte(null, nestedNullMapAddr + (offsetStart + offsetRow)) == 0)) { - int offset = UdfUtils.UNSAFE - .getInt(null, strOffsetAddr + (offsetRow + offsetStart) * 4L); - int numBytes = offset - UdfUtils.UNSAFE - .getInt(null, strOffsetAddr + 4L * ((offsetRow + offsetStart) - 1)); - long base = dataAddr + offset - numBytes; - byte[] bytes = new byte[numBytes]; - UdfUtils.copyMemory(null, base, bytes, UdfUtils.BYTE_ARRAY_OFFSET, numBytes); - data.add(new String(bytes, StandardCharsets.UTF_8)); - } else { - data.add(null); - } - } - } - return data; - } -} diff --git a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java deleted file mode 100644 index 0559b29e9bd92e..00000000000000 --- a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java +++ /dev/null @@ -1,258 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Type; -import org.apache.doris.common.Pair; -import org.apache.doris.common.exception.UdfRuntimeException; -import org.apache.doris.common.jni.utils.JavaUdfDataType; -import org.apache.doris.common.jni.utils.UdfUtils; -import org.apache.doris.thrift.TJavaUdfExecutorCtorParams; - -import com.esotericsoftware.reflectasm.MethodAccess; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import org.apache.log4j.Logger; - -import java.lang.reflect.Array; -import java.lang.reflect.Constructor; -import java.lang.reflect.Method; -import java.net.MalformedURLException; -import java.util.ArrayList; - -public class UdfExecutor extends BaseExecutor { - // private static final java.util.logging.Logger LOG = - // Logger.getLogger(UdfExecutor.class); - public static final Logger LOG = Logger.getLogger(UdfExecutor.class); - // setup by init() and cleared by close() - private Method method; - - private int evaluateIndex; - - /** - * Create a UdfExecutor, using parameters from a serialized thrift object. Used by - * the backend. - */ - public UdfExecutor(byte[] thriftParams) throws Exception { - super(thriftParams); - } - - /** - * Close the class loader we may have created. - */ - @Override - public void close() { - // We are now un-usable (because the class loader has been - // closed), so null out method_ and classLoader_. - method = null; - super.close(); - } - - public Object[] convertBasicArguments(int argIdx, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, long strOffsetAddr) { - return convertBasicArg(true, argIdx, isNullable, 0, numRows, nullMapAddr, columnAddr, strOffsetAddr); - } - - public Object[] convertArrayArguments(int argIdx, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr) { - return convertArrayArg(argIdx, isNullable, 0, numRows, nullMapAddr, offsetsAddr, nestedNullMapAddr, dataAddr, - strOffsetAddr); - } - - public Object[] convertMapArguments(int argIdx, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long keyNestedNullMapAddr, long keyDataAddr, long keyStrOffsetAddr, - long valueNestedNullMapAddr, long valueDataAddr, long valueStrOffsetAddr) { - PrimitiveType keyType = argTypes[argIdx].getKeyType().getPrimitiveType(); - PrimitiveType valueType = argTypes[argIdx].getValueType().getPrimitiveType(); - Object[] keyCol = convertMapArg(keyType, argIdx, isNullable, 0, numRows, nullMapAddr, offsetsAddr, - keyNestedNullMapAddr, keyDataAddr, - keyStrOffsetAddr, argTypes[argIdx].getKeyScale()); - Object[] valueCol = convertMapArg(valueType, argIdx, isNullable, 0, numRows, nullMapAddr, offsetsAddr, - valueNestedNullMapAddr, valueDataAddr, - valueStrOffsetAddr, argTypes[argIdx].getValueScale()); - return buildHashMap(keyType, valueType, keyCol, valueCol); - } - - /** - * Evaluates the UDF with 'args' as the input to the UDF. - */ - public Object[] evaluate(int numRows, Object[] column) throws UdfRuntimeException { - try { - Object[] result = (Object[]) Array.newInstance(method.getReturnType(), numRows); - Object[][] inputs = (Object[][]) column; - Object[] parameters = new Object[inputs.length]; - for (int i = 0; i < numRows; ++i) { - for (int j = 0; j < column.length; ++j) { - parameters[j] = inputs[j][i]; - } - result[i] = methodAccess.invoke(udf, evaluateIndex, parameters); - } - return result; - } catch (Exception e) { - LOG.info("evaluate exception: " + debugString()); - LOG.info("evaluate(int numRows, Object[] column) Exception: " + e.toString()); - throw new UdfRuntimeException("UDF failed to evaluate", e); - } - } - - public void copyBatchBasicResult(boolean isNullable, int numRows, Object[] result, long nullMapAddr, - long resColumnAddr, long strOffsetAddr) { - copyBatchBasicResultImpl(isNullable, numRows, result, nullMapAddr, resColumnAddr, strOffsetAddr, getMethod()); - } - - public void copyBatchArrayResult(boolean isNullable, int numRows, Object[] result, long nullMapAddr, - long offsetsAddr, long nestedNullMapAddr, long dataAddr, long strOffsetAddr) { - Preconditions.checkState(result.length == numRows, - "copyBatchArrayResult result size should equal;"); - copyBatchArrayResultImpl(isNullable, numRows, result, nullMapAddr, offsetsAddr, nestedNullMapAddr, dataAddr, - strOffsetAddr, retType.getItemType().getPrimitiveType(), retType.getScale()); - } - - public void copyBatchMapResult(boolean isNullable, int numRows, Object[] result, long nullMapAddr, - long offsetsAddr, long keyNsestedNullMapAddr, long keyDataAddr, long keyStrOffsetAddr, - long valueNsestedNullMapAddr, long valueDataAddr, long valueStrOffsetAddr) { - Preconditions.checkState(result.length == numRows, - "copyBatchMapResult result size should equal;"); - PrimitiveType keyType = retType.getKeyType().getPrimitiveType(); - PrimitiveType valueType = retType.getValueType().getPrimitiveType(); - Object[] keyCol = new Object[result.length]; - Object[] valueCol = new Object[result.length]; - buildArrayListFromHashMap(result, keyType, valueType, keyCol, valueCol); - - copyBatchArrayResultImpl(isNullable, numRows, valueCol, nullMapAddr, offsetsAddr, valueNsestedNullMapAddr, - valueDataAddr, - valueStrOffsetAddr, valueType, retType.getKeyScale()); - copyBatchArrayResultImpl(isNullable, numRows, keyCol, nullMapAddr, offsetsAddr, keyNsestedNullMapAddr, - keyDataAddr, - keyStrOffsetAddr, keyType, retType.getValueScale()); - } - - /** - * Evaluates the UDF with 'args' as the input to the UDF. - */ - private Object evaluate(Object... args) throws UdfRuntimeException { - try { - return method.invoke(udf, args); - } catch (Exception e) { - throw new UdfRuntimeException("UDF failed to evaluate", e); - } - } - - public Method getMethod() { - return method; - } - - // Preallocate the input objects that will be passed to the underlying UDF. - // These objects are allocated once and reused across calls to evaluate() - @Override - protected void init(TJavaUdfExecutorCtorParams request, String jarPath, Type funcRetType, - Type... parameterTypes) throws UdfRuntimeException { - String className = request.fn.scalar_fn.symbol; - ArrayList signatures = Lists.newArrayList(); - try { - LOG.debug("Loading UDF '" + className + "' from " + jarPath); - ClassLoader loader; - if (jarPath != null) { - // Save for cleanup. - ClassLoader parent = getClass().getClassLoader(); - classLoader = UdfUtils.getClassLoader(jarPath, parent); - loader = classLoader; - } else { - // for test - loader = ClassLoader.getSystemClassLoader(); - } - Class c = Class.forName(className, true, loader); - methodAccess = MethodAccess.get(c); - Constructor ctor = c.getConstructor(); - udf = ctor.newInstance(); - Method[] methods = c.getMethods(); - for (Method m : methods) { - // By convention, the udf must contain the function "evaluate" - if (!m.getName().equals(UDF_FUNCTION_NAME)) { - continue; - } - signatures.add(m.toGenericString()); - argClass = m.getParameterTypes(); - - // Try to match the arguments - if (argClass.length != parameterTypes.length) { - continue; - } - method = m; - evaluateIndex = methodAccess.getIndex(UDF_FUNCTION_NAME); - Pair returnType; - if (argClass.length == 0 && parameterTypes.length == 0) { - // Special case where the UDF doesn't take any input args - returnType = UdfUtils.setReturnType(funcRetType, m.getReturnType()); - if (!returnType.first) { - continue; - } else { - retType = returnType.second; - } - argTypes = new JavaUdfDataType[0]; - LOG.debug("Loaded UDF '" + className + "' from " + jarPath); - return; - } - returnType = UdfUtils.setReturnType(funcRetType, m.getReturnType()); - if (!returnType.first) { - continue; - } else { - retType = returnType.second; - } - Type keyType = retType.getKeyType(); - Type valueType = retType.getValueType(); - Pair inputType = UdfUtils.setArgTypes(parameterTypes, argClass, false); - if (!inputType.first) { - continue; - } else { - argTypes = inputType.second; - } - LOG.debug("Loaded UDF '" + className + "' from " + jarPath); - retType.setKeyType(keyType); - retType.setValueType(valueType); - return; - } - - StringBuilder sb = new StringBuilder(); - sb.append("Unable to find evaluate function with the correct signature: ") - .append(className + ".evaluate(") - .append(Joiner.on(", ").join(parameterTypes)) - .append(")\n") - .append("UDF contains: \n ") - .append(Joiner.on("\n ").join(signatures)); - throw new UdfRuntimeException(sb.toString()); - } catch (MalformedURLException e) { - throw new UdfRuntimeException("Unable to load jar.", e); - } catch (SecurityException e) { - throw new UdfRuntimeException("Unable to load function.", e); - } catch (ClassNotFoundException e) { - throw new UdfRuntimeException("Unable to find class.", e); - } catch (NoSuchMethodException e) { - throw new UdfRuntimeException( - "Unable to find constructor with no arguments.", e); - } catch (IllegalArgumentException e) { - throw new UdfRuntimeException( - "Unable to call UDF constructor with no arguments.", e); - } catch (Exception e) { - throw new UdfRuntimeException("Unable to call create UDF instance.", e); - } - } - -} diff --git a/fe/be-java-extensions/java-udf/src/main/resources/package.xml b/fe/be-java-extensions/java-udf/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/java-udf/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/ConstantOneUdf.java b/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/ConstantOneUdf.java deleted file mode 100644 index 458cdb3cc1aa00..00000000000000 --- a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/ConstantOneUdf.java +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -public class ConstantOneUdf { - public int evaluate() { - return 1; - } -} diff --git a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DateTimeUdf.java b/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DateTimeUdf.java deleted file mode 100644 index 98eaa35fbe873f..00000000000000 --- a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DateTimeUdf.java +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -import java.time.LocalDateTime; - -public class DateTimeUdf { - /** - * input argument of datetime. - * return year - */ - public int evaluate(LocalDateTime a) { - return a.getYear(); - } -} diff --git a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DecimalUdf.java b/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DecimalUdf.java deleted file mode 100644 index 8ec393ef929bc1..00000000000000 --- a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/DecimalUdf.java +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -import java.math.BigDecimal; - -public class DecimalUdf { - /** - * a input argument of decimal. - * b input argument of decimal - * sum of a and b - */ - public BigDecimal evaluate(BigDecimal a, BigDecimal b) { - return a.add(b); - } -} diff --git a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/LargeIntUdf.java b/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/LargeIntUdf.java deleted file mode 100644 index 2a12ee043d45bd..00000000000000 --- a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/LargeIntUdf.java +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -import java.math.BigInteger; - -public class LargeIntUdf { - /** - * input argument of largeint. - * input argument of largeint - * sum of a and b - */ - public BigInteger evaluate(BigInteger a, BigInteger b) { - return a.add(b); - } -} diff --git a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/SimpleAddUdf.java b/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/SimpleAddUdf.java deleted file mode 100644 index 7816ea4ab1201c..00000000000000 --- a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/SimpleAddUdf.java +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -public class SimpleAddUdf { - public Integer evaluate(Integer a, int b) { - return a == null ? null : a + b; - } -} diff --git a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/StringConcatUdf.java b/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/StringConcatUdf.java deleted file mode 100644 index 2dd22d20315bf2..00000000000000 --- a/fe/be-java-extensions/java-udf/src/test/java/org/apache/doris/udf/StringConcatUdf.java +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.udf; - -public class StringConcatUdf { - public String evaluate(String a, String b) { - return a == null || b == null ? null : a + b; - } -} diff --git a/fe/be-java-extensions/jdbc-scanner/pom.xml b/fe/be-java-extensions/jdbc-scanner/pom.xml deleted file mode 100644 index 0f3eace7dda8d4..00000000000000 --- a/fe/be-java-extensions/jdbc-scanner/pom.xml +++ /dev/null @@ -1,96 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - - jdbc-scanner - - - 8 - 8 - - - - - org.apache.doris - java-common - ${project.version} - - - com.oracle.database.jdbc - ojdbc8 - provided - - - com.alibaba - druid - provided - - - com.clickhouse - clickhouse-jdbc - all - provided - - - com.oracle.ojdbc - orai18n - 19.3.0.0 - provided - - - - - jdbc-scanner - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/main/resources/package.xml - - - - - - - - - - make-assembly - package - - single - - - - - - - - diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java deleted file mode 100644 index 95b3dac585a0f2..00000000000000 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.jdbc; - -import com.alibaba.druid.pool.DruidDataSource; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -public class JdbcDataSource { - private static final JdbcDataSource jdbcDataSource = new JdbcDataSource(); - private final Map sourcesMap = new ConcurrentHashMap<>(); - - public static JdbcDataSource getDataSource() { - return jdbcDataSource; - } - - public DruidDataSource getSource(String jdbcUrl) { - return sourcesMap.get(jdbcUrl); - } - - public void putSource(String jdbcUrl, DruidDataSource ds) { - sourcesMap.put(jdbcUrl, ds); - } - - public Map getSourcesMap() { - return sourcesMap; - } -} diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java deleted file mode 100644 index 74e65c73ab17e2..00000000000000 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java +++ /dev/null @@ -1,2166 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.jdbc; - -import org.apache.doris.common.exception.InternalException; -import org.apache.doris.common.exception.UdfRuntimeException; -import org.apache.doris.common.jni.utils.JNINativeMethod; -import org.apache.doris.common.jni.utils.UdfUtils; -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.VectorColumn; -import org.apache.doris.common.jni.vec.VectorTable; -import org.apache.doris.thrift.TJdbcExecutorCtorParams; -import org.apache.doris.thrift.TJdbcOperation; -import org.apache.doris.thrift.TOdbcTableType; - -import com.alibaba.druid.pool.DruidDataSource; -import com.clickhouse.data.value.UnsignedByte; -import com.clickhouse.data.value.UnsignedInteger; -import com.clickhouse.data.value.UnsignedLong; -import com.clickhouse.data.value.UnsignedShort; -import com.google.common.base.Preconditions; -import com.vesoft.nebula.client.graph.data.ValueWrapper; -import org.apache.log4j.Logger; -import org.apache.thrift.TDeserializer; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TBinaryProtocol; - -import java.io.FileNotFoundException; -import java.lang.reflect.Array; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.math.RoundingMode; -import java.net.Inet4Address; -import java.net.Inet6Address; -import java.net.InetAddress; -import java.net.MalformedURLException; -import java.nio.charset.StandardCharsets; -import java.sql.Connection; -import java.sql.Date; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Timestamp; -import java.sql.Types; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.OffsetDateTime; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.function.Function; - -public class JdbcExecutor { - private static final Logger LOG = Logger.getLogger(JdbcExecutor.class); - private static final TBinaryProtocol.Factory PROTOCOL_FACTORY = new TBinaryProtocol.Factory(); - private Connection conn = null; - private PreparedStatement preparedStatement = null; - private Statement stmt = null; - private ResultSet resultSet = null; - private ResultSetMetaData resultSetMetaData = null; - private List resultColumnTypeNames = null; - private List block = null; - private int batchSizeNum = 0; - private int curBlockRows = 0; - private static final byte[] emptyBytes = new byte[0]; - private DruidDataSource druidDataSource = null; - private byte[] druidDataSourceLock = new byte[0]; - private int minPoolSize; - private int maxPoolSize; - private int minIdleSize; - private int maxIdleTime; - private int maxWaitTime; - private TOdbcTableType tableType; - - public JdbcExecutor(byte[] thriftParams) throws Exception { - TJdbcExecutorCtorParams request = new TJdbcExecutorCtorParams(); - TDeserializer deserializer = new TDeserializer(PROTOCOL_FACTORY); - try { - deserializer.deserialize(request, thriftParams); - } catch (TException e) { - throw new InternalException(e.getMessage()); - } - tableType = request.table_type; - minPoolSize = Integer.valueOf(System.getProperty("JDBC_MIN_POOL", "1")); - maxPoolSize = Integer.valueOf(System.getProperty("JDBC_MAX_POOL", "100")); - maxIdleTime = Integer.valueOf(System.getProperty("JDBC_MAX_IDLE_TIME", "300000")); - maxWaitTime = Integer.valueOf(System.getProperty("JDBC_MAX_WAIT_TIME", "5000")); - minIdleSize = minPoolSize > 0 ? 1 : 0; - LOG.info("JdbcExecutor set minPoolSize = " + minPoolSize - + ", maxPoolSize = " + maxPoolSize - + ", maxIdleTime = " + maxIdleTime - + ", maxWaitTime = " + maxWaitTime - + ", minIdleSize = " + minIdleSize); - init(request.driver_path, request.statement, request.batch_size, request.jdbc_driver_class, - request.jdbc_url, request.jdbc_user, request.jdbc_password, request.op, request.table_type); - } - - public boolean isNebula() { - return tableType == TOdbcTableType.NEBULA; - } - - public void close() throws Exception { - if (resultSet != null) { - resultSet.close(); - } - if (stmt != null) { - stmt.close(); - } - if (conn != null) { - conn.close(); - } - if (minIdleSize == 0) { - // it can be immediately closed if there is no need to maintain the cache of datasource - druidDataSource.close(); - JdbcDataSource.getDataSource().getSourcesMap().clear(); - druidDataSource = null; - } - resultSet = null; - stmt = null; - conn = null; - } - - public int read() throws UdfRuntimeException { - try { - resultSet = ((PreparedStatement) stmt).executeQuery(); - resultSetMetaData = resultSet.getMetaData(); - int columnCount = resultSetMetaData.getColumnCount(); - resultColumnTypeNames = new ArrayList<>(columnCount); - block = new ArrayList<>(columnCount); - if (isNebula()) { - for (int i = 0; i < columnCount; ++i) { - block.add((Object[]) Array.newInstance(Object.class, batchSizeNum)); - } - } else { - for (int i = 0; i < columnCount; ++i) { - resultColumnTypeNames.add(resultSetMetaData.getColumnClassName(i + 1)); - block.add((Object[]) Array.newInstance(Object.class, batchSizeNum)); - } - } - return columnCount; - } catch (SQLException e) { - throw new UdfRuntimeException("JDBC executor sql has error: ", e); - } - } - - public int write(String sql) throws UdfRuntimeException { - try { - return stmt.executeUpdate(sql); - } catch (SQLException e) { - throw new UdfRuntimeException("JDBC executor sql has error: ", e); - } - } - - public int write(Map params) throws UdfRuntimeException { - String[] requiredFields = params.get("required_fields").split(","); - String[] types = params.get("columns_types").split("#"); - long metaAddress = Long.parseLong(params.get("meta_address")); - // Get sql string from configuration map - ColumnType[] columnTypes = new ColumnType[types.length]; - for (int i = 0; i < types.length; i++) { - columnTypes[i] = ColumnType.parseType(requiredFields[i], types[i]); - } - VectorTable batchTable = new VectorTable(columnTypes, requiredFields, metaAddress); - // todo: insert the batch table by PreparedStatement - // Can't release or close batchTable, it's released by c++ - try { - insert(batchTable); - } catch (SQLException e) { - throw new UdfRuntimeException("JDBC executor sql has error: ", e); - } - return batchTable.getNumRows(); - } - - private int insert(VectorTable data) throws SQLException { - for (int i = 0; i < data.getNumRows(); ++i) { - for (int j = 0; j < data.getColumns().length; ++j) { - insertColumn(i, j, data.getColumns()[j]); - } - preparedStatement.addBatch(); - } - preparedStatement.executeBatch(); - preparedStatement.clearBatch(); - return data.getNumRows(); - } - - private void insertColumn(int rowIdx, int colIdx, VectorColumn column) throws SQLException { - int parameterIndex = colIdx + 1; - ColumnType.Type dorisType = column.getColumnTyp(); - if (column.isNullAt(rowIdx)) { - insertNullColumn(parameterIndex, dorisType); - return; - } - switch (dorisType) { - case BOOLEAN: - preparedStatement.setBoolean(parameterIndex, column.getBoolean(rowIdx)); - break; - case TINYINT: - preparedStatement.setByte(parameterIndex, column.getByte(rowIdx)); - break; - case SMALLINT: - preparedStatement.setShort(parameterIndex, column.getShort(rowIdx)); - break; - case INT: - preparedStatement.setInt(parameterIndex, column.getInt(rowIdx)); - break; - case BIGINT: - preparedStatement.setLong(parameterIndex, column.getLong(rowIdx)); - break; - case LARGEINT: - preparedStatement.setObject(parameterIndex, column.getBigInteger(rowIdx)); - break; - case FLOAT: - preparedStatement.setFloat(parameterIndex, column.getFloat(rowIdx)); - break; - case DOUBLE: - preparedStatement.setDouble(parameterIndex, column.getDouble(rowIdx)); - break; - case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - preparedStatement.setBigDecimal(parameterIndex, column.getDecimal(rowIdx)); - break; - case DATEV2: - preparedStatement.setDate(parameterIndex, Date.valueOf(column.getDate(rowIdx))); - break; - case DATETIMEV2: - preparedStatement.setTimestamp(parameterIndex, Timestamp.valueOf(column.getDateTime(rowIdx))); - break; - case CHAR: - case VARCHAR: - case STRING: - case BINARY: - preparedStatement.setString(parameterIndex, column.getStringWithOffset(rowIdx)); - break; - default: - throw new RuntimeException("Unknown type value: " + dorisType); - } - } - - private void insertNullColumn(int parameterIndex, ColumnType.Type dorisType) throws SQLException { - switch (dorisType) { - case BOOLEAN: - preparedStatement.setNull(parameterIndex, Types.BOOLEAN); - break; - case TINYINT: - preparedStatement.setNull(parameterIndex, Types.TINYINT); - break; - case SMALLINT: - preparedStatement.setNull(parameterIndex, Types.SMALLINT); - break; - case INT: - preparedStatement.setNull(parameterIndex, Types.INTEGER); - break; - case BIGINT: - preparedStatement.setNull(parameterIndex, Types.BIGINT); - break; - case LARGEINT: - preparedStatement.setNull(parameterIndex, Types.JAVA_OBJECT); - break; - case FLOAT: - preparedStatement.setNull(parameterIndex, Types.FLOAT); - break; - case DOUBLE: - preparedStatement.setNull(parameterIndex, Types.DOUBLE); - break; - case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - preparedStatement.setNull(parameterIndex, Types.DECIMAL); - break; - case DATEV2: - preparedStatement.setNull(parameterIndex, Types.DATE); - break; - case DATETIMEV2: - preparedStatement.setNull(parameterIndex, Types.TIMESTAMP); - break; - case CHAR: - case VARCHAR: - case STRING: - case BINARY: - preparedStatement.setNull(parameterIndex, Types.VARCHAR); - break; - default: - throw new RuntimeException("Unknown type value: " + dorisType); - } - } - - public List getResultColumnTypeNames() { - return resultColumnTypeNames; - } - - public void openTrans() throws UdfRuntimeException { - try { - if (conn != null) { - conn.setAutoCommit(false); - } - } catch (SQLException e) { - throw new UdfRuntimeException("JDBC executor open transaction has error: ", e); - } - } - - public void commitTrans() throws UdfRuntimeException { - try { - if (conn != null) { - conn.commit(); - } - } catch (SQLException e) { - throw new UdfRuntimeException("JDBC executor commit transaction has error: ", e); - } - } - - public void rollbackTrans() throws UdfRuntimeException { - try { - if (conn != null) { - conn.rollback(); - } - } catch (SQLException e) { - throw new UdfRuntimeException("JDBC executor rollback transaction has error: ", e); - } - } - - public List getBlock(int batchSize, Object colsArray) throws UdfRuntimeException { - try { - ArrayList colsTypes = (ArrayList) colsArray; - Integer[] colArray = new Integer[colsTypes.size()]; - colArray = colsTypes.toArray(colArray); - int columnCount = resultSetMetaData.getColumnCount(); - curBlockRows = 0; - do { - for (int i = 0; i < columnCount; ++i) { - // colArray[i] > 0, means the type is Hll/Bitmap, we should read it with getBytes - // instead of getObject, as Hll/Bitmap in JDBC will map to String by default. - if (colArray[i] > 0) { - block.get(i)[curBlockRows] = resultSet.getBytes(i + 1); - } else { - block.get(i)[curBlockRows] = resultSet.getObject(i + 1); - } - } - curBlockRows++; - } while (curBlockRows < batchSize && resultSet.next()); - } catch (SQLException e) { - throw new UdfRuntimeException("get next block failed: ", e); - } - return block; - } - - public List getBlock(int batchSize) throws UdfRuntimeException { - try { - int columnCount = resultSetMetaData.getColumnCount(); - curBlockRows = 0; - - if (isNebula()) { - do { - for (int i = 0; i < columnCount; ++i) { - block.get(i)[curBlockRows] = UdfUtils.convertObject((ValueWrapper) resultSet.getObject(i + 1)); - } - curBlockRows++; - } while (curBlockRows < batchSize && resultSet.next()); - } else { - do { - for (int i = 0; i < columnCount; ++i) { - block.get(i)[curBlockRows] = resultSet.getObject(i + 1); - } - curBlockRows++; - } while (curBlockRows < batchSize && resultSet.next()); - } - } catch (SQLException e) { - throw new UdfRuntimeException("get next block failed: ", e); - } - return block; - } - - public int getCurBlockRows() { - return curBlockRows; - } - - public boolean hasNext() throws UdfRuntimeException { - try { - if (resultSet == null) { - return false; - } - return resultSet.next(); - } catch (SQLException e) { - throw new UdfRuntimeException("resultSet to get next error: ", e); - } - } - - private void init(String driverUrl, String sql, int batchSize, String driverClass, String jdbcUrl, String jdbcUser, - String jdbcPassword, TJdbcOperation op, TOdbcTableType tableType) throws UdfRuntimeException { - try { - if (isNebula()) { - batchSizeNum = batchSize; - Class.forName(driverClass); - conn = DriverManager.getConnection(jdbcUrl, jdbcUser, jdbcPassword); - stmt = conn.prepareStatement(sql); - } else { - ClassLoader parent = getClass().getClassLoader(); - ClassLoader classLoader = UdfUtils.getClassLoader(driverUrl, parent); - druidDataSource = JdbcDataSource.getDataSource().getSource(jdbcUrl + jdbcUser + jdbcPassword); - if (druidDataSource == null) { - synchronized (druidDataSourceLock) { - druidDataSource = JdbcDataSource.getDataSource().getSource(jdbcUrl + jdbcUser + jdbcPassword); - if (druidDataSource == null) { - long start = System.currentTimeMillis(); - DruidDataSource ds = new DruidDataSource(); - ds.setDriverClassLoader(classLoader); - ds.setDriverClassName(driverClass); - ds.setUrl(jdbcUrl); - ds.setUsername(jdbcUser); - ds.setPassword(jdbcPassword); - ds.setMinIdle(minIdleSize); - ds.setInitialSize(minPoolSize); - ds.setMaxActive(maxPoolSize); - ds.setMaxWait(maxWaitTime); - ds.setTestWhileIdle(true); - ds.setTestOnBorrow(false); - setValidationQuery(ds, tableType); - ds.setTimeBetweenEvictionRunsMillis(maxIdleTime / 5); - ds.setMinEvictableIdleTimeMillis(maxIdleTime); - druidDataSource = ds; - // here is a cache of datasource, which using the string(jdbcUrl + jdbcUser + - // jdbcPassword) as key. - // and the default datasource init = 1, min = 1, max = 100, if one of connection idle - // time greater than 10 minutes. then connection will be retrieved. - JdbcDataSource.getDataSource().putSource(jdbcUrl + jdbcUser + jdbcPassword, ds); - LOG.info("init datasource [" + (jdbcUrl + jdbcUser) + "] cost: " + ( - System.currentTimeMillis() - start) + " ms"); - } - } - } - - long start = System.currentTimeMillis(); - conn = druidDataSource.getConnection(); - LOG.info("get connection [" + (jdbcUrl + jdbcUser) + "] cost: " + (System.currentTimeMillis() - start) - + " ms"); - if (op == TJdbcOperation.READ) { - conn.setAutoCommit(false); - Preconditions.checkArgument(sql != null); - stmt = conn.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - if (tableType == TOdbcTableType.MYSQL) { - stmt.setFetchSize(Integer.MIN_VALUE); - } else { - stmt.setFetchSize(batchSize); - } - batchSizeNum = batchSize; - } else { - LOG.info("insert sql: " + sql); - preparedStatement = conn.prepareStatement(sql); - } - } - } catch (MalformedURLException e) { - throw new UdfRuntimeException("MalformedURLException to load class about " + driverUrl, e); - } catch (SQLException e) { - throw new UdfRuntimeException("Initialize datasource failed: ", e); - } catch (FileNotFoundException e) { - throw new UdfRuntimeException("FileNotFoundException failed: ", e); - } catch (Exception e) { - throw new UdfRuntimeException("Initialize datasource failed: ", e); - } - } - - private void setValidationQuery(DruidDataSource ds, TOdbcTableType tableType) { - if (tableType == TOdbcTableType.ORACLE || tableType == TOdbcTableType.OCEANBASE_ORACLE) { - ds.setValidationQuery("SELECT 1 FROM dual"); - } else if (tableType == TOdbcTableType.SAP_HANA) { - ds.setValidationQuery("SELECT 1 FROM DUMMY"); - } else { - ds.setValidationQuery("SELECT 1"); - } - } - - public void booleanPutToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, long columnAddr, - int startRow) { - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(columnAddr + i, (Boolean) column[i] ? (byte) 1 : 0); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putByte(columnAddr + i, (Boolean) column[i] ? (byte) 1 : 0); - } - } - } - - public void copyBatchBooleanResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof Boolean) { - booleanPutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Integer) { - integerPutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Byte) { - bytePutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void bigDecimalPutToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - Short res = ((BigDecimal) column[i]).shortValueExact(); - UdfUtils.UNSAFE.putByte(columnAddr + i, res.byteValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - Short res = ((BigDecimal) column[i]).shortValueExact(); - UdfUtils.UNSAFE.putByte(columnAddr + i, res.byteValue()); - } - } - } - - private void integerPutToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(columnAddr + i, ((Integer) column[i]).byteValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putByte(columnAddr + i, ((Integer) column[i]).byteValue()); - } - } - } - - private void shortPutToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(columnAddr + i, ((Short) column[i]).byteValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putByte(columnAddr + i, ((Short) column[i]).byteValue()); - } - } - } - - private void bytePutToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putByte(columnAddr + i, (Byte) column[i]); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putByte(columnAddr + i, (Byte) column[i]); - } - } - } - - private void objectPutToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - String columnStr = String.valueOf(column[i]); - int columnInt = Integer.parseInt(columnStr); - UdfUtils.UNSAFE.putByte(columnAddr + i, (byte) columnInt); - } - } - } else { - for (int i = 0; i < numRows; i++) { - String columnStr = String.valueOf(column[i]); - int columnInt = Integer.parseInt(columnStr); - UdfUtils.UNSAFE.putByte(columnAddr + i, (byte) columnInt); - } - } - } - - public void copyBatchTinyIntResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof BigDecimal) { - bigDecimalPutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Integer) { - integerPutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Short) { - shortPutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Byte) { - bytePutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof java.lang.Object) { - objectPutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void bigDecimalPutToShort(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), ((BigDecimal) column[i]).shortValueExact()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), ((BigDecimal) column[i]).shortValueExact()); - } - } - } - - private void integerPutToShort(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), ((Integer) column[i]).shortValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), ((Integer) column[i]).shortValue()); - } - } - } - - private void shortPutToShort(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), (Short) column[i]); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), (Short) column[i]); - } - } - } - - public void clickHouseUInt8ToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), (short) ((UnsignedByte) column[i]).intValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putShort(columnAddr + (i * 2L), (short) ((UnsignedByte) column[i]).intValue()); - } - } - } - - public void copyBatchSmallIntResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof BigDecimal) { - bigDecimalPutToShort(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Integer) { - integerPutToShort(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Short) { - shortPutToShort(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof com.clickhouse.data.value.UnsignedByte) { - clickHouseUInt8ToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void bigDecimalPutToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), ((BigDecimal) column[i]).intValueExact()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), ((BigDecimal) column[i]).intValueExact()); - } - } - } - - private void integerPutToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), (Integer) column[i]); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), (Integer) column[i]); - } - } - } - - private void longPutToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), ((Long) column[i]).intValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), ((Long) column[i]).intValue()); - } - } - } - - public void clickHouseUInt16ToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), ((UnsignedShort) column[i]).intValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), ((UnsignedShort) column[i]).intValue()); - } - } - } - - public void copyBatchIntResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof BigDecimal) { - bigDecimalPutToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Integer) { - integerPutToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof java.lang.Long) { - // For mysql view. But don't worry about overflow - longPutToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof com.clickhouse.data.value.UnsignedShort) { - clickHouseUInt16ToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void bigDecimalPutToLong(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), ((BigDecimal) column[i]).longValueExact()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), ((BigDecimal) column[i]).longValueExact()); - } - } - } - - private void longPutToLong(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), (Long) column[i]); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), (Long) column[i]); - } - } - } - - private void clickHouseUInt32ToLong(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), ((UnsignedInteger) column[i]).longValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), ((UnsignedInteger) column[i]).longValue()); - } - } - } - - public void copyBatchBigIntResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof BigDecimal) { - bigDecimalPutToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Long) { - longPutToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof com.clickhouse.data.value.UnsignedInteger) { - clickHouseUInt32ToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void bigDecimalPutToBigInteger(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - BigInteger[] data = new BigInteger[numRows]; - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - data[i] = null; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - data[i] = ((BigDecimal) column[i]).toBigInteger(); - } - } - copyBatchDecimalResult(data, isNullable, numRows, columnAddr, 16, startRowForNullable); - } - - private void bigIntegerPutToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - BigInteger columnValue = (BigInteger) column[i]; - byte[] bytes = UdfUtils.convertByteOrder(columnValue.toByteArray()); - byte[] value = new byte[16]; - if (columnValue.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { - value[index] = bytes[index]; - } - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, columnAddr + (i * 16L), 16); - } - } - } else { - for (int i = 0; i < numRows; i++) { - BigInteger columnValue = (BigInteger) column[i]; - byte[] bytes = UdfUtils.convertByteOrder(columnValue.toByteArray()); - byte[] value = new byte[16]; - if (columnValue.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { - value[index] = bytes[index]; - } - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, columnAddr + (i * 16L), 16); - } - } - } - - private void stringPutToBigInteger(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - BigInteger[] data = new BigInteger[numRows]; - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - data[i] = null; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - data[i] = new BigInteger((String) column[i]); - } - } - copyBatchDecimalResult(data, isNullable, numRows, columnAddr, 16, startRowForNullable); - } - - private void clickHouseUInt64ToByte(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UnsignedLong columnValue = (UnsignedLong) column[i]; - BigInteger bigIntValue = columnValue.bigIntegerValue(); - byte[] bytes = UdfUtils.convertByteOrder(bigIntValue.toByteArray()); - byte[] value = new byte[16]; - if (bigIntValue.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, columnAddr + (i * 16L), 16); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UnsignedLong columnValue = (UnsignedLong) column[i]; - BigInteger bigIntValue = columnValue.bigIntegerValue(); - byte[] bytes = UdfUtils.convertByteOrder(bigIntValue.toByteArray()); - byte[] value = new byte[16]; - if (bigIntValue.signum() == -1) { - Arrays.fill(value, (byte) -1); - } - System.arraycopy(bytes, 0, value, 0, Math.min(bytes.length, value.length)); - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, columnAddr + (i * 16L), 16); - } - } - } - - public void copyBatchLargeIntResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof BigDecimal) { - bigDecimalPutToBigInteger(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof BigInteger) { - bigIntegerPutToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof String) { - stringPutToBigInteger(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof com.clickhouse.data.value.UnsignedLong) { - clickHouseUInt64ToByte(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - public void copyBatchFloatResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putFloat(columnAddr + (i * 4L), (Float) column[i]); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putFloat(columnAddr + (i * 4L), (Float) column[i]); - } - } - } - - private void bigDecimalPutToDouble(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putDouble(columnAddr + (i * 8L), ((BigDecimal) column[i]).doubleValue()); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putDouble(columnAddr + (i * 8L), ((BigDecimal) column[i]).doubleValue()); - } - } - } - - private void doublePutToDouble(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - UdfUtils.UNSAFE.putDouble(columnAddr + (i * 8L), (Double) column[i]); - } - } - } else { - for (int i = 0; i < numRows; i++) { - UdfUtils.UNSAFE.putDouble(columnAddr + (i * 8L), (Double) column[i]); - } - } - } - - public void copyBatchDoubleResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof BigDecimal) { - bigDecimalPutToDouble(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Double) { - doublePutToDouble(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void localDatePutToLong(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDate date = (LocalDate) column[i]; - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), 0, 0, 0, true)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDate date = (LocalDate) column[i]; - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), 0, 0, 0, true)); - } - } - } - - private void datePutToLong(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDate date = ((Date) column[i]).toLocalDate(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), 0, 0, 0, true)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDate date = ((Date) column[i]).toLocalDate(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), 0, 0, 0, true)); - } - } - } - - public void copyBatchDateResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof LocalDate) { - localDatePutToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Date) { - datePutToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void localDatePutToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDate date = (LocalDate) column[i]; - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), - UdfUtils.convertToDateV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth())); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDate date = (LocalDate) column[i]; - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), - UdfUtils.convertToDateV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth())); - } - } - } - - private void datePutToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDate date = ((Date) column[i]).toLocalDate(); - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), - UdfUtils.convertToDateV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth())); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDate date = ((Date) column[i]).toLocalDate(); - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), - UdfUtils.convertToDateV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth())); - } - } - } - - private void timestampPutToInt(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = ((java.sql.Timestamp) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putInt(columnAddr + (i * 4L), - UdfUtils.convertToDateV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth())); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = ((java.sql.Timestamp) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 4L), - UdfUtils.convertToDateV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth())); - } - } - } - - public void copyBatchDateV2Result(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof LocalDate) { - localDatePutToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Date) { - datePutToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof Timestamp) { - timestampPutToInt(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void localDateTimePutToLong(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = (LocalDateTime) column[i]; - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), false)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = (LocalDateTime) column[i]; - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), false)); - } - } - } - - private void timestampPutToLong(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = ((java.sql.Timestamp) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), date.getSecond(), false)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = ((java.sql.Timestamp) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), - date.getHour(), date.getMinute(), date.getSecond(), false)); - } - } - } - - private void oracleTimetampPutToLong(Object[] column, boolean isNullable, int numRows, - long nullMapAddr, - long columnAddr, int startRowForNullable) throws SQLException { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = ((oracle.sql.TIMESTAMP) column[i]).timestampValue().toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), date.getSecond(), false)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = ((oracle.sql.TIMESTAMP) column[i]).timestampValue().toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTime(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), - date.getHour(), date.getMinute(), date.getSecond(), false)); - } - } - } - - public void copyBatchDateTimeResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) throws SQLException { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof LocalDateTime) { - localDateTimePutToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof java.sql.Timestamp) { - timestampPutToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof oracle.sql.TIMESTAMP) { - oracleTimetampPutToLong(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - private void localDateTimePutToLongV2(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = (LocalDateTime) column[i]; - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), date.getNano() / 1000)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = (LocalDateTime) column[i]; - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), date.getNano() / 1000)); - } - } - } - - private void timestampPutToLongV2(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = ((java.sql.Timestamp) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), date.getNano() / 1000)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = ((java.sql.Timestamp) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), - date.getHour(), date.getMinute(), date.getSecond(), date.getNano() / 1000)); - } - } - } - - private void oracleTimetampPutToLongV2(Object[] column, boolean isNullable, int numRows, - long nullMapAddr, long columnAddr, int startRowForNullable) throws SQLException { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = ((oracle.sql.TIMESTAMP) column[i]).timestampValue().toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), date.getNano() / 1000)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = ((oracle.sql.TIMESTAMP) column[i]).timestampValue().toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), date.getDayOfMonth(), - date.getHour(), date.getMinute(), date.getSecond(), date.getNano() / 1000)); - } - } - } - - private void offsetDateTimePutToLongV2(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] == null) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - LocalDateTime date = ((OffsetDateTime) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), date.getNano() / 1000)); - } - } - } else { - for (int i = 0; i < numRows; i++) { - LocalDateTime date = ((OffsetDateTime) column[i]).toLocalDateTime(); - UdfUtils.UNSAFE.putLong(columnAddr + (i * 8L), - UdfUtils.convertToDateTimeV2(date.getYear(), date.getMonthValue(), - date.getDayOfMonth(), date.getHour(), date.getMinute(), - date.getSecond(), date.getNano() / 1000)); - } - } - } - - public void copyBatchDateTimeV2Result(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) throws SQLException { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof LocalDateTime) { - localDateTimePutToLongV2(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof java.sql.Timestamp) { - timestampPutToLongV2(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof OffsetDateTime) { - offsetDateTimePutToLongV2(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } else if (column[firstNotNullIndex] instanceof oracle.sql.TIMESTAMP) { - oracleTimetampPutToLongV2(column, isNullable, numRows, nullMapAddr, columnAddr, firstNotNullIndex); - } - } - - public String trimSpaces(String str) { - int end = str.length() - 1; - while (end >= 0 && str.charAt(end) == ' ') { - end--; - } - return str.substring(0, end + 1); - } - - public void copyBatchCharResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr, boolean needTrimSpaces) { - if (needTrimSpaces == true) { - Object[] column = (Object[]) columnObj; - for (int i = 0; i < numRows; i++) { - if (column[i] != null) { - column[i] = trimSpaces((String) column[i]); - } - } - copyBatchStringResult(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else { - copyBatchStringResult(columnObj, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } - } - - private void hllPutToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable == true) { - // Here can not loop from startRowForNullable, - // because byteRes will be used later - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - byteRes[i] = (byte[]) column[i]; - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - byteRes[i] = (byte[]) column[i]; - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private void bitMapPutToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable == true) { - // Here can not loop from startRowForNullable, - // because byteRes will be used later - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - byteRes[i] = (byte[]) column[i]; - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - byteRes[i] = (byte[]) column[i]; - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - public void copyBatchHllResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - hllPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } - - public void copyBatchBitMapResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - bitMapPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } - - private static String simplifyIPv6Address(String address) { - // Replace longest sequence of zeros with "::" - String[] parts = address.split(":"); - int longestSeqStart = -1; - int longestSeqLen = 0; - int curSeqStart = -1; - int curSeqLen = 0; - for (int i = 0; i < parts.length; i++) { - if (parts[i].equals("0")) { - if (curSeqStart == -1) { - curSeqStart = i; - } - curSeqLen++; - if (curSeqLen > longestSeqLen) { - longestSeqStart = curSeqStart; - longestSeqLen = curSeqLen; - } - } else { - curSeqStart = -1; - curSeqLen = 0; - } - } - if (longestSeqLen <= 1) { - return address; // No sequences of zeros to replace - } - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < longestSeqStart; i++) { - sb.append(parts[i]).append(':'); - } - sb.append(':'); - for (int i = longestSeqStart + longestSeqLen; i < parts.length; i++) { - sb.append(parts[i]); - if (i < parts.length - 1) { - sb.append(':'); - } - } - return sb.toString(); - } - - private void ipPutToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - String ip = ((java.net.InetAddress) column[i]).getHostAddress(); - if (column[i] instanceof java.net.Inet6Address) { - ip = simplifyIPv6Address(ip); - } - byteRes[i] = ip.getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - String ip = ((java.net.InetAddress) column[i]).getHostAddress(); - if (column[i] instanceof java.net.Inet6Address) { - ip = simplifyIPv6Address(ip); - } - byteRes[i] = ip.getBytes(StandardCharsets.UTF_8); - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private void oracleClobToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - try { - oracle.sql.CLOB clob = (oracle.sql.CLOB) column[i]; - String result = clob.getSubString(1, (int) clob.length()); - byteRes[i] = result.getBytes(StandardCharsets.UTF_8); - } catch (Exception e) { - LOG.info("clobToString have error when convert " + e.getMessage()); - } - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - try { - oracle.sql.CLOB clob = (oracle.sql.CLOB) column[i]; - String result = clob.getSubString(1, (int) clob.length()); - byteRes[i] = result.getBytes(StandardCharsets.UTF_8); - } catch (Exception e) { - LOG.info("clobToString have error when convert " + e.getMessage()); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private void objectPutToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - // Here can not loop from startRowForNullable, - // because byteRes will be used later - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - String result = column[i].toString(); - if (column[i] instanceof java.sql.Time) { - // the default toString() method doesn't format the milliseconds in Time. - long milliseconds = ((java.sql.Time) column[i]).getTime() % 1000L; - if (milliseconds > 0) { - result = String.format("%s.%03d", column[i].toString(), milliseconds); - } - } - byteRes[i] = result.getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - boolean isTime = numRows > 0 && column[0] instanceof java.sql.Time; - for (int i = 0; i < numRows; i++) { - String result = column[i].toString(); - if (isTime) { - // Doc https://dev.mysql.com/doc/connector-j/8.0/en/connector-j-reference-type-conversions.html - // shows that jdbc API use java.sql.Time to hold the TIME type, - // but java.sql.Time can only have millisecond precision. - // the default toString() method doesn't format the milliseconds in Time. - // Doc https://dev.mysql.com/doc/refman/8.0/en/time.html shows that MySQL supports time[0~6], - // so time[4~6] will lose precision - long milliseconds = ((java.sql.Time) column[i]).getTime() % 1000L; - if (milliseconds > 0) { - result = String.format("%s.%03d", column[i].toString(), milliseconds); - } - } - byteRes[i] = result.getBytes(StandardCharsets.UTF_8); - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private void stringPutToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - byteRes[i] = ((String) column[i]).getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - byteRes[i] = ((String) column[i]).getBytes(StandardCharsets.UTF_8); - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private void byteaPutToHexString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - byteRes[i] = byteArrayToHexString((byte[]) column[i]).getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - byteRes[i] = byteArrayToHexString((byte[]) column[i]).getBytes(StandardCharsets.UTF_8); - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private static String byteArrayToHexString(byte[] bytes) { - StringBuilder hexString = new StringBuilder("\\x"); - for (byte b : bytes) { - hexString.append(String.format("%02x", b & 0xff)); - } - return hexString.toString(); - } - - private void byteaPutToMySQLString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - byteRes[i] = mysqlByteArrayToHexString((byte[]) column[i]).getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - byteRes[i] = mysqlByteArrayToHexString((byte[]) column[i]).getBytes(StandardCharsets.UTF_8); - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private static String mysqlByteArrayToHexString(byte[] bytes) { - StringBuilder hexString = new StringBuilder("0x"); - for (byte b : bytes) { - String hex = Integer.toHexString(0xFF & b); - if (hex.length() == 1) { - hexString.append('0'); - } - hexString.append(hex.toUpperCase()); - } - return hexString.toString(); - } - - public void copyBatchStringResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof String) { - stringPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else if (column[firstNotNullIndex] instanceof byte[] && tableType == TOdbcTableType.POSTGRESQL) { - // for postgresql bytea type - byteaPutToHexString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else if ((column[firstNotNullIndex] instanceof java.net.Inet4Address - || column[firstNotNullIndex] instanceof java.net.Inet6Address) - && tableType == TOdbcTableType.CLICKHOUSE) { - // for clickhouse ipv4 and ipv6 type - ipPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else if (column[firstNotNullIndex] instanceof byte[] && (tableType == TOdbcTableType.MYSQL - || tableType == TOdbcTableType.OCEANBASE)) { - // for mysql bytea type - byteaPutToMySQLString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else if (column[firstNotNullIndex] instanceof oracle.sql.CLOB && tableType == TOdbcTableType.ORACLE) { - // for oracle clob type - oracleClobToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else { - // object like in pg type point, polygon, jsonb..... get object is - // org.postgresql.util.PGobject..... - // here object put to string, so the object must have impl toString() function - objectPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } - } - - public void copyBatchDecimalV2Result(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr) { - Object[] column = (Object[]) columnObj; - BigInteger[] data = new BigInteger[numRows]; - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - data[i] = null; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - data[i] = ((BigDecimal) column[i]).setScale(9, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - copyBatchDecimalResult(data, isNullable, numRows, columnAddr, 16, 0); - } - - public void copyBatchDecimal32Result(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int scale) { - Object[] column = (Object[]) columnObj; - BigInteger[] data = new BigInteger[numRows]; - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - data[i] = null; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - data[i] = ((BigDecimal) column[i]).setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - copyBatchDecimalResult(data, isNullable, numRows, columnAddr, 4, 0); - } - - public void copyBatchDecimal64Result(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int scale) { - Object[] column = (Object[]) columnObj; - BigInteger[] data = new BigInteger[numRows]; - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - data[i] = null; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - data[i] = ((BigDecimal) column[i]).setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - copyBatchDecimalResult(data, isNullable, numRows, columnAddr, 8, 0); - } - - public void copyBatchDecimal128Result(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long columnAddr, int scale) { - Object[] column = (Object[]) columnObj; - BigInteger[] data = new BigInteger[numRows]; - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - data[i] = null; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - data[i] = ((BigDecimal) column[i]).setScale(scale, RoundingMode.HALF_EVEN).unscaledValue(); - } - } - copyBatchDecimalResult(data, isNullable, numRows, columnAddr, 16, 0); - } - - private void copyBatchDecimalResult(BigInteger[] column, boolean isNullable, int numRows, - long columnAddr, int typeLen, int startRowForNullable) { - if (isNullable) { - for (int i = startRowForNullable; i < numRows; i++) { - if (column[i] != null) { - byte[] bytes = UdfUtils.convertByteOrder(column[i].toByteArray()); - byte[] value = new byte[typeLen]; - if (column[i].signum() == -1) { - Arrays.fill(value, (byte) -1); - } - for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { - value[index] = bytes[index]; - } - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, columnAddr + ((long) i * typeLen), - typeLen); - } - } - } else { - for (int i = 0; i < numRows; i++) { - byte[] bytes = UdfUtils.convertByteOrder(column[i].toByteArray()); - byte[] value = new byte[typeLen]; - if (column[i].signum() == -1) { - Arrays.fill(value, (byte) -1); - } - for (int index = 0; index < Math.min(bytes.length, value.length); ++index) { - value[index] = bytes[index]; - } - UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET, null, columnAddr + ((long) i * typeLen), - typeLen); - } - } - } - - private static final Map, Function> CK_ARRAY_CONVERTERS = new HashMap<>(); - - static { - CK_ARRAY_CONVERTERS.put(String[].class, res -> Arrays.toString((String[]) res)); - CK_ARRAY_CONVERTERS.put(boolean[].class, res -> Arrays.toString((boolean[]) res)); - CK_ARRAY_CONVERTERS.put(byte[].class, res -> Arrays.toString((byte[]) res)); - CK_ARRAY_CONVERTERS.put(Byte[].class, res -> Arrays.toString((Byte[]) res)); - CK_ARRAY_CONVERTERS.put(LocalDate[].class, res -> Arrays.toString((LocalDate[]) res)); - CK_ARRAY_CONVERTERS.put(LocalDateTime[].class, res -> Arrays.toString((LocalDateTime[]) res)); - CK_ARRAY_CONVERTERS.put(float[].class, res -> Arrays.toString((float[]) res)); - CK_ARRAY_CONVERTERS.put(double[].class, res -> Arrays.toString((double[]) res)); - CK_ARRAY_CONVERTERS.put(short[].class, res -> Arrays.toString((short[]) res)); - CK_ARRAY_CONVERTERS.put(int[].class, res -> Arrays.toString((int[]) res)); - CK_ARRAY_CONVERTERS.put(long[].class, res -> Arrays.toString((long[]) res)); - CK_ARRAY_CONVERTERS.put(BigInteger[].class, res -> Arrays.toString((BigInteger[]) res)); - CK_ARRAY_CONVERTERS.put(BigDecimal[].class, res -> Arrays.toString((BigDecimal[]) res)); - CK_ARRAY_CONVERTERS.put(Inet4Address[].class, res -> Arrays.toString(Arrays.stream((Inet4Address[]) res) - .map(InetAddress::getHostAddress).toArray(String[]::new))); - CK_ARRAY_CONVERTERS.put(Inet6Address[].class, res -> Arrays.toString(Arrays.stream((Inet6Address[]) res) - .map(addr -> simplifyIPv6Address(addr.getHostAddress())).toArray(String[]::new))); - CK_ARRAY_CONVERTERS.put(UUID[].class, res -> Arrays.toString((UUID[]) res)); - } - - public static Object convertClickHouseArray(Object obj) { - Function converter = CK_ARRAY_CONVERTERS.get(obj.getClass()); - return converter != null ? converter.apply(obj) : obj; - } - - private void ckArrayPutToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - // Here can not loop from startRowForNullable, - // because byteRes will be used later - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - byteRes[i] = ((String) convertClickHouseArray(column[i])).getBytes(StandardCharsets.UTF_8); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - byteRes[i] = ((String) convertClickHouseArray(column[i])).getBytes(StandardCharsets.UTF_8); - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - private void arrayPutToString(Object[] column, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - int[] offsets = new int[numRows]; - byte[][] byteRes = new byte[numRows][]; - int offset = 0; - if (isNullable) { - // Here can not loop from startRowForNullable, - // because byteRes will be used later - for (int i = 0; i < numRows; i++) { - if (column[i] == null) { - byteRes[i] = emptyBytes; - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - try { - byteRes[i] = Arrays.toString((Object[]) ((java.sql.Array) column[i]).getArray()) - .getBytes(StandardCharsets.UTF_8); - } catch (SQLException e) { - LOG.info("arrayPutToString have error when convert " + e.getMessage()); - } - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } else { - for (int i = 0; i < numRows; i++) { - try { - byteRes[i] = Arrays.toString((Object[]) ((java.sql.Array) column[i]).getArray()) - .getBytes(StandardCharsets.UTF_8); - } catch (SQLException e) { - LOG.info("arrayPutToString have error when convert " + e.getMessage()); - } - offset += byteRes[i].length; - offsets[i] = offset; - } - } - byte[] bytes = new byte[offsets[numRows - 1]]; - long bytesAddr = JNINativeMethod.resizeStringColumn(charsAddr, offsets[numRows - 1]); - int dst = 0; - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < byteRes[i].length; j++) { - bytes[dst++] = byteRes[i][j]; - } - } - UdfUtils.copyMemory(offsets, UdfUtils.INT_ARRAY_OFFSET, null, offsetsAddr, numRows * 4L); - UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET, null, bytesAddr, offsets[numRows - 1]); - } - - public void copyBatchArrayResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - // for doris array - if (column[firstNotNullIndex] instanceof String) { - stringPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else if (column[firstNotNullIndex] instanceof java.sql.Array) { - // for PG array - arrayPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else { - // For the ClickHouse array type - ckArrayPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } - } - - public void copyBatchJsonResult(Object columnObj, boolean isNullable, int numRows, long nullMapAddr, - long offsetsAddr, long charsAddr) { - Object[] column = (Object[]) columnObj; - int firstNotNullIndex = 0; - if (isNullable) { - firstNotNullIndex = getFirstNotNullObject(column, numRows, nullMapAddr); - } - if (firstNotNullIndex == numRows) { - return; - } - if (column[firstNotNullIndex] instanceof String) { - stringPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } else { - objectPutToString(column, isNullable, numRows, nullMapAddr, offsetsAddr, charsAddr); - } - } - - private int getFirstNotNullObject(Object[] column, int numRows, long nullMapAddr) { - int i = 0; - for (; i < numRows; ++i) { - if (null == column[i]) { - UdfUtils.UNSAFE.putByte(nullMapAddr + i, (byte) 1); - } else { - break; - } - } - return i; - } -} - diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/resources/package.xml b/fe/be-java-extensions/jdbc-scanner/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/jdbc-scanner/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/be-java-extensions/max-compute-scanner/pom.xml b/fe/be-java-extensions/max-compute-scanner/pom.xml deleted file mode 100644 index 8e057da807459e..00000000000000 --- a/fe/be-java-extensions/max-compute-scanner/pom.xml +++ /dev/null @@ -1,104 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - - max-compute-scanner - - - 8 - 8 - - - - - org.apache.doris - java-common - ${project.version} - - - com.aliyun.odps - odps-sdk-core - - - org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - - - - - org.apache.arrow - arrow-vector - ${arrow.version} - - - com.fasterxml.jackson.core - jackson-databind - - - - - org.apache.arrow - arrow-memory-unsafe - ${arrow.version} - - - - - max-compute-scanner - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/main/resources/package.xml - - - - - - - - - - make-assembly - package - - single - - - - - - - - diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java deleted file mode 100644 index 65810163840e34..00000000000000 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java +++ /dev/null @@ -1,237 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.maxcompute; - -import org.apache.doris.common.jni.vec.ColumnValue; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.log4j.Logger; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteOrder; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.List; - -/** - * MaxCompute Column value in vector column - */ -public class MaxComputeColumnValue implements ColumnValue { - private static final Logger LOG = Logger.getLogger(MaxComputeColumnValue.class); - private int idx; - private FieldVector column; - - public MaxComputeColumnValue() { - idx = 0; - } - - public void reset(FieldVector column) { - this.column = column; - this.idx = 0; - } - - @Override - public boolean canGetStringAsBytes() { - return true; - } - - @Override - public boolean isNull() { - return column.isNull(idx); - } - - private void skippedIfNull() { - // null has been process by appendValue with isNull() - try { - if (column.isNull(idx)) { - idx++; - } - } catch (IndexOutOfBoundsException e) { - // skip left rows - idx++; - } - } - - @Override - public boolean getBoolean() { - skippedIfNull(); - BitVector bitCol = (BitVector) column; - return bitCol.get(idx++) != 0; - } - - @Override - public byte getByte() { - skippedIfNull(); - TinyIntVector tinyIntCol = (TinyIntVector) column; - return tinyIntCol.get(idx++); - } - - @Override - public short getShort() { - skippedIfNull(); - SmallIntVector smallIntCol = (SmallIntVector) column; - return smallIntCol.get(idx++); - } - - @Override - public int getInt() { - skippedIfNull(); - IntVector intCol = (IntVector) column; - return intCol.get(idx++); - } - - @Override - public float getFloat() { - skippedIfNull(); - Float4Vector floatCol = (Float4Vector) column; - return floatCol.get(idx++); - } - - @Override - public long getLong() { - skippedIfNull(); - BigIntVector longCol = (BigIntVector) column; - return longCol.get(idx++); - } - - @Override - public double getDouble() { - skippedIfNull(); - Float8Vector doubleCol = (Float8Vector) column; - return doubleCol.get(idx++); - } - - @Override - public BigInteger getBigInteger() { - skippedIfNull(); - BigIntVector longCol = (BigIntVector) column; - return BigInteger.valueOf(longCol.get(idx++)); - } - - @Override - public BigDecimal getDecimal() { - skippedIfNull(); - DecimalVector decimalCol = (DecimalVector) column; - return getBigDecimalFromArrowBuf(column.getDataBuffer(), idx++, - decimalCol.getScale(), DecimalVector.TYPE_WIDTH); - } - - /** - * copy from arrow vector DecimalUtility.getBigDecimalFromArrowBuf - * @param byteBuf byteBuf - * @param index index - * @param scale scale - * @param byteWidth DecimalVector TYPE_WIDTH - * @return java BigDecimal - */ - public static BigDecimal getBigDecimalFromArrowBuf(ArrowBuf byteBuf, int index, int scale, int byteWidth) { - byte[] value = new byte[byteWidth]; - byte temp; - final long startIndex = (long) index * byteWidth; - - byteBuf.getBytes(startIndex, value, 0, byteWidth); - if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) { - // Decimal stored as native endian, need to swap bytes to make BigDecimal if native endian is LE - int stop = byteWidth / 2; - for (int i = 0, j; i < stop; i++) { - temp = value[i]; - j = (byteWidth - 1) - i; - value[i] = value[j]; - value[j] = temp; - } - } - BigInteger unscaledValue = new BigInteger(value); - return new BigDecimal(unscaledValue, scale); - } - - @Override - public String getString() { - skippedIfNull(); - VarCharVector varcharCol = (VarCharVector) column; - String v = varcharCol.getObject(idx++).toString(); - return v == null ? new String(new byte[0]) : v; - } - - @Override - public byte[] getStringAsBytes() { - skippedIfNull(); - VarCharVector varcharCol = (VarCharVector) column; - byte[] v = varcharCol.getObject(idx++).getBytes(); - return v == null ? new byte[0] : v; - } - - @Override - public LocalDate getDate() { - skippedIfNull(); - DateDayVector dateCol = (DateDayVector) column; - Integer intVal = dateCol.getObject(idx++); - return LocalDate.ofEpochDay(intVal == null ? 0 : intVal); - } - - @Override - public LocalDateTime getDateTime() { - skippedIfNull(); - LocalDateTime result; - if (column instanceof DateMilliVector) { - DateMilliVector datetimeCol = (DateMilliVector) column; - result = datetimeCol.getObject(idx++); - } else { - TimeStampNanoVector datetimeCol = (TimeStampNanoVector) column; - result = datetimeCol.getObject(idx++); - } - return result == null ? LocalDateTime.MIN : result; - } - - @Override - public byte[] getBytes() { - skippedIfNull(); - VarBinaryVector binaryCol = (VarBinaryVector) column; - byte[] v = binaryCol.getObject(idx++); - return v == null ? new byte[0] : v; - } - - @Override - public void unpackArray(List values) { - - } - - @Override - public void unpackMap(List keys, List values) { - - } - - @Override - public void unpackStruct(List structFieldIndex, List values) { - - } -} diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java deleted file mode 100644 index 8f9b903afdc716..00000000000000 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java +++ /dev/null @@ -1,262 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.maxcompute; - -import org.apache.doris.common.jni.JniScanner; -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ScanPredicate; - -import com.aliyun.odps.Column; -import com.aliyun.odps.OdpsType; -import com.aliyun.odps.data.ArrowRecordReader; -import com.aliyun.odps.tunnel.TableTunnel; -import com.aliyun.odps.type.TypeInfo; -import com.aliyun.odps.type.TypeInfoFactory; -import com.google.common.base.Strings; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.log4j.Logger; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; - -/** - * MaxComputeJ JniScanner. BE will read data from the scanner object. - */ -public class MaxComputeJniScanner extends JniScanner { - private static final Logger LOG = Logger.getLogger(MaxComputeJniScanner.class); - private static final String REGION = "region"; - private static final String PROJECT = "project"; - private static final String TABLE = "table"; - private static final String ACCESS_KEY = "access_key"; - private static final String SECRET_KEY = "secret_key"; - private static final String START_OFFSET = "start_offset"; - private static final String SPLIT_SIZE = "split_size"; - private static final String PUBLIC_ACCESS = "public_access"; - private static final Map tableScans = new ConcurrentHashMap<>(); - private final String region; - private final String project; - private final String table; - private final MaxComputeTableScan curTableScan; - private MaxComputeColumnValue columnValue; - private long remainBatchRows = 0; - private long totalRows = 0; - private RootAllocator arrowAllocator; - private ArrowRecordReader curReader; - private List readColumns; - private Map readColumnsToId; - private long startOffset = -1L; - private long splitSize = -1L; - - public MaxComputeJniScanner(int batchSize, Map params) { - region = Objects.requireNonNull(params.get(REGION), "required property '" + REGION + "'."); - project = Objects.requireNonNull(params.get(PROJECT), "required property '" + PROJECT + "'."); - table = Objects.requireNonNull(params.get(TABLE), "required property '" + TABLE + "'."); - tableScans.putIfAbsent(tableUniqKey(), newTableScan(params)); - curTableScan = tableScans.get(tableUniqKey()); - - String[] requiredFields = params.get("required_fields").split(","); - String[] types = params.get("columns_types").split("#"); - ColumnType[] columnTypes = new ColumnType[types.length]; - for (int i = 0; i < types.length; i++) { - columnTypes[i] = ColumnType.parseType(requiredFields[i], types[i]); - } - ScanPredicate[] predicates = new ScanPredicate[0]; - if (params.containsKey("push_down_predicates")) { - long predicatesAddress = Long.parseLong(params.get("push_down_predicates")); - if (predicatesAddress != 0) { - predicates = ScanPredicate.parseScanPredicates(predicatesAddress, columnTypes); - LOG.info("MaxComputeJniScanner gets pushed-down predicates: " + ScanPredicate.dump(predicates)); - } - } - initTableInfo(columnTypes, requiredFields, predicates, batchSize); - } - - private MaxComputeTableScan newTableScan(Map params) { - if (!Strings.isNullOrEmpty(params.get(START_OFFSET)) - && !Strings.isNullOrEmpty(params.get(SPLIT_SIZE))) { - startOffset = Long.parseLong(params.get(START_OFFSET)); - splitSize = Long.parseLong(params.get(SPLIT_SIZE)); - } - String accessKey = Objects.requireNonNull(params.get(ACCESS_KEY), "required property '" + ACCESS_KEY + "'."); - String secretKey = Objects.requireNonNull(params.get(SECRET_KEY), "required property '" + SECRET_KEY + "'."); - boolean enablePublicAccess = Boolean.parseBoolean(params.getOrDefault(PUBLIC_ACCESS, "false")); - return new MaxComputeTableScan(region, project, table, accessKey, secretKey, enablePublicAccess); - } - - public String tableUniqKey() { - return region + "#" + project + "." + table; - } - - @Override - protected void initTableInfo(ColumnType[] requiredTypes, String[] requiredFields, ScanPredicate[] predicates, - int batchSize) { - super.initTableInfo(requiredTypes, requiredFields, predicates, batchSize); - readColumns = new ArrayList<>(); - readColumnsToId = new HashMap<>(); - for (int i = 0; i < fields.length; i++) { - if (!Strings.isNullOrEmpty(fields[i])) { - readColumns.add(createOdpsColumn(i, types[i])); - readColumnsToId.put(fields[i], i); - } - } - // reorder columns - List columnList = curTableScan.getSchema().getColumns(); - Map columnRank = new HashMap<>(); - for (int i = 0; i < columnList.size(); i++) { - columnRank.put(columnList.get(i).getName(), i); - } - // Downloading columns data from Max compute only supports the order of table metadata. - // We might get an error message if no sort here: Column reorder is not supported in legacy arrow mode. - readColumns.sort((Comparator.comparing(o -> columnRank.get(o.getName())))); - } - - @Override - public void open() throws IOException { - if (readColumns.isEmpty()) { - return; - } - try { - TableTunnel.DownloadSession session = curTableScan.getSession(); - long start = startOffset == -1L ? 0 : startOffset; - long recordCount = session.getRecordCount(); - totalRows = splitSize > 0 ? Math.min(splitSize, recordCount) : recordCount; - - arrowAllocator = new RootAllocator(Long.MAX_VALUE); - curReader = session.openArrowRecordReader(start, totalRows, readColumns, arrowAllocator); - } catch (Exception e) { - close(); - throw new IOException(e); - } - remainBatchRows = totalRows; - } - - private Column createOdpsColumn(int colIdx, ColumnType dorisType) { - TypeInfo odpsType; - switch (dorisType.getType()) { - case BOOLEAN: - odpsType = TypeInfoFactory.BOOLEAN; - break; - case TINYINT: - odpsType = TypeInfoFactory.TINYINT; - break; - case SMALLINT: - odpsType = TypeInfoFactory.SMALLINT; - break; - case INT: - odpsType = TypeInfoFactory.INT; - break; - case BIGINT: - odpsType = TypeInfoFactory.BIGINT; - break; - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - case DECIMALV2: - odpsType = TypeInfoFactory.getDecimalTypeInfo(dorisType.getPrecision(), dorisType.getScale()); - break; - case FLOAT: - odpsType = TypeInfoFactory.FLOAT; - break; - case DOUBLE: - odpsType = TypeInfoFactory.DOUBLE; - break; - case DATETIMEV2: - odpsType = TypeInfoFactory.DATETIME; - break; - case DATEV2: - odpsType = TypeInfoFactory.DATE; - break; - case CHAR: - odpsType = TypeInfoFactory.getCharTypeInfo(dorisType.getLength()); - break; - case VARCHAR: - odpsType = TypeInfoFactory.getVarcharTypeInfo(dorisType.getLength()); - break; - case STRING: - odpsType = TypeInfoFactory.getPrimitiveTypeInfo(OdpsType.STRING); - break; - default: - throw new RuntimeException("Unsupported transform for column type: " + dorisType.getType()); - } - return new Column(fields[colIdx], odpsType); - } - - @Override - public void close() throws IOException { - String tableName = tableUniqKey(); - MaxComputeTableScan scan = tableScans.get(tableName); - if (scan != null && scan.endOfScan()) { - tableScans.remove(tableName); - } - remainBatchRows = 0; - totalRows = 0; - startOffset = -1; - splitSize = -1; - if (curReader != null) { - arrowAllocator.close(); - curReader.close(); - curReader = null; - } - } - - @Override - protected int getNext() throws IOException { - if (curReader == null) { - return 0; - } - columnValue = new MaxComputeColumnValue(); - int expectedRows = (int) Math.min(batchSize, remainBatchRows); - int realRows = readVectors(expectedRows); - if (remainBatchRows <= 0) { - return 0; - } - remainBatchRows -= realRows; - curTableScan.increaseReadRows(realRows); - return realRows; - } - - private int readVectors(int expectedRows) throws IOException { - VectorSchemaRoot batch; - int curReadRows = 0; - while (curReadRows < expectedRows && (batch = curReader.read()) != null) { - try { - List fieldVectors = batch.getFieldVectors(); - int batchRows = 0; - for (FieldVector column : fieldVectors) { - columnValue.reset(column); - batchRows = column.getValueCount(); - for (int j = 0; j < batchRows; j++) { - appendData(readColumnsToId.get(column.getName()), columnValue); - } - } - curReadRows += batchRows; - } finally { - batch.close(); - } - } - return curReadRows; - } -} diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java deleted file mode 100644 index da67196a3a2f57..00000000000000 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.maxcompute; - -import com.aliyun.odps.Odps; -import com.aliyun.odps.TableSchema; -import com.aliyun.odps.account.AliyunAccount; -import com.aliyun.odps.tunnel.TableTunnel; -import com.aliyun.odps.tunnel.TunnelException; - -import java.io.IOException; - -/** - * MaxComputeJ JniScanner. BE will read data from the scanner object. - */ -public class MaxComputeTableScan { - private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun-inc.com/api"; - private static final String tunnelUrlTemplate = "http://dt.{}.maxcompute.aliyun-inc.com"; - private final Odps odps; - private final TableTunnel tunnel; - private final String project; - private final String table; - private volatile TableTunnel.DownloadSession tableSession; - private volatile long readRows = 0; - - public MaxComputeTableScan(String region, String project, String table, - String accessKey, String secretKey, boolean enablePublicAccess) { - this.project = project; - this.table = table; - odps = new Odps(new AliyunAccount(accessKey, secretKey)); - String odpsUrl = odpsUrlTemplate.replace("{}", region); - String tunnelUrl = tunnelUrlTemplate.replace("{}", region); - if (enablePublicAccess) { - odpsUrl = odpsUrl.replace("-inc", ""); - tunnelUrl = tunnelUrl.replace("-inc", ""); - } - odps.setEndpoint(odpsUrl); - odps.setDefaultProject(this.project); - tunnel = new TableTunnel(odps); - tunnel.setEndpoint(tunnelUrl); - } - - public TableSchema getSchema() { - return odps.tables().get(table).getSchema(); - } - - public synchronized TableTunnel.DownloadSession getSession() throws IOException { - if (tableSession == null) { - try { - tableSession = tunnel.createDownloadSession(project, table); - } catch (TunnelException e) { - throw new IOException(e); - } - } - return tableSession; - } - - public synchronized void increaseReadRows(long rows) { - // multi-thread writing must be synchronized - readRows += rows; - } - - public boolean endOfScan() { - return readRows >= tableSession.getRecordCount(); - } -} diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/resources/package.xml b/fe/be-java-extensions/max-compute-scanner/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/max-compute-scanner/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/be-java-extensions/paimon-scanner/pom.xml b/fe/be-java-extensions/paimon-scanner/pom.xml deleted file mode 100644 index 76da4288d0727f..00000000000000 --- a/fe/be-java-extensions/paimon-scanner/pom.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - - paimon-scanner - - - 8 - 8 - - - - - org.apache.doris - java-common - ${project.version} - - - fe-common - org.apache.doris - - - - - - org.apache.paimon - paimon-bundle - ${paimon.version} - - - org.apache.paimon - paimon-hive-connector-2.3 - ${paimon.version} - - - org.apache.paimon - paimon-s3 - ${paimon.version} - - - org.apache.paimon - paimon-oss-impl - ${paimon.version} - - - org.apache.thrift - libthrift - 0.9.3 - - - com.facebook.presto.hive - hive-apache - ${presto.hive.version} - - - org.slf4j - slf4j-log4j12 - - - - - org.apache.hadoop - hadoop-client - - - org.apache.hadoop - hadoop-common - - - org.apache.hadoop - hadoop-hdfs - - - commons-io - commons-io - - - - - - paimon-scanner - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/main/resources/package.xml - - - - - - - - - - make-assembly - package - - single - - - - - - - diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java deleted file mode 100644 index a8783fe5294f81..00000000000000 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java +++ /dev/null @@ -1,145 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.paimon; - -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ColumnValue; - -import org.apache.paimon.data.InternalRow; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.time.Instant; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.ZoneOffset; -import java.util.List; - -public class PaimonColumnValue implements ColumnValue { - private int idx; - private InternalRow record; - ColumnType dorisType; - - public PaimonColumnValue() { - } - - public void setIdx(int idx, ColumnType dorisType) { - this.idx = idx; - this.dorisType = dorisType; - } - - public void setOffsetRow(InternalRow record) { - this.record = record; - } - - @Override - public boolean canGetStringAsBytes() { - return true; - } - - @Override - public boolean getBoolean() { - return record.getBoolean(idx); - } - - @Override - public byte getByte() { - return record.getByte(idx); - } - - @Override - public short getShort() { - return record.getShort(idx); - } - - @Override - public int getInt() { - return record.getInt(idx); - } - - @Override - public float getFloat() { - return record.getFloat(idx); - } - - @Override - public long getLong() { - return record.getLong(idx); - } - - @Override - public double getDouble() { - return record.getDouble(idx); - } - - @Override - public BigInteger getBigInteger() { - return BigInteger.valueOf(record.getInt(idx)); - } - - @Override - public BigDecimal getDecimal() { - return record.getDecimal(idx, dorisType.getPrecision(), dorisType.getScale()).toBigDecimal(); - } - - @Override - public String getString() { - return record.getString(idx).toString(); - } - - @Override - public byte[] getStringAsBytes() { - return record.getString(idx).toBytes(); - } - - @Override - public LocalDate getDate() { - return LocalDate.ofEpochDay(record.getLong(idx)); - } - - @Override - public LocalDateTime getDateTime() { - return Instant.ofEpochMilli(record.getTimestamp(idx, 3) - .getMillisecond()).atZone(ZoneOffset.ofHours(0)).toLocalDateTime(); - } - - @Override - public boolean isNull() { - return record.isNullAt(idx); - } - - @Override - public byte[] getBytes() { - return record.getBinary(idx); - } - - @Override - public void unpackArray(List values) { - - } - - @Override - public void unpackMap(List keys, List values) { - - } - - @Override - public void unpackStruct(List structFieldIndex, List values) { - - } -} diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java deleted file mode 100644 index 3fe4efac9ebba5..00000000000000 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ /dev/null @@ -1,173 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.paimon; - -import org.apache.doris.common.jni.JniScanner; -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ScanPredicate; -import org.apache.doris.common.jni.vec.TableSchema; - -import org.apache.paimon.catalog.Catalog; -import org.apache.paimon.catalog.CatalogContext; -import org.apache.paimon.catalog.CatalogFactory; -import org.apache.paimon.catalog.Identifier; -import org.apache.paimon.data.InternalRow; -import org.apache.paimon.options.Options; -import org.apache.paimon.predicate.Predicate; -import org.apache.paimon.reader.RecordReader; -import org.apache.paimon.table.Table; -import org.apache.paimon.table.source.ReadBuilder; -import org.apache.paimon.table.source.Split; -import org.apache.paimon.types.DataType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - - -public class PaimonJniScanner extends JniScanner { - private static final Logger LOG = LoggerFactory.getLogger(PaimonJniScanner.class); - private static final String PAIMON_OPTION_PREFIX = "paimon_option_prefix."; - private final Map paimonOptionParams; - private final String dbName; - private final String tblName; - private final String paimonSplit; - private final String paimonPredicate; - private Table table; - private RecordReader reader; - private final PaimonColumnValue columnValue = new PaimonColumnValue(); - private List paimonAllFieldNames; - - public PaimonJniScanner(int batchSize, Map params) { - LOG.debug("params:{}", params); - paimonSplit = params.get("paimon_split"); - paimonPredicate = params.get("paimon_predicate"); - dbName = params.get("db_name"); - tblName = params.get("table_name"); - super.batchSize = batchSize; - super.fields = params.get("paimon_column_names").split(","); - super.predicates = new ScanPredicate[0]; - paimonOptionParams = params.entrySet().stream() - .filter(kv -> kv.getKey().startsWith(PAIMON_OPTION_PREFIX)) - .collect(Collectors - .toMap(kv1 -> kv1.getKey().substring(PAIMON_OPTION_PREFIX.length()), kv1 -> kv1.getValue())); - - } - - @Override - public void open() throws IOException { - initTable(); - initReader(); - parseRequiredTypes(); - } - - private void initReader() throws IOException { - ReadBuilder readBuilder = table.newReadBuilder(); - readBuilder.withProjection(getProjected()); - readBuilder.withFilter(getPredicates()); - reader = readBuilder.newRead().createReader(getSplit()); - } - - private int[] getProjected() { - return Arrays.stream(fields).mapToInt(paimonAllFieldNames::indexOf).toArray(); - } - - private List getPredicates() { - List predicates = PaimonScannerUtils.decodeStringToObject(paimonPredicate); - LOG.info("predicates:{}", predicates); - return predicates; - } - - private Split getSplit() { - Split split = PaimonScannerUtils.decodeStringToObject(paimonSplit); - LOG.info("split:{}", split); - return split; - } - - private void parseRequiredTypes() { - ColumnType[] columnTypes = new ColumnType[fields.length]; - for (int i = 0; i < fields.length; i++) { - int index = paimonAllFieldNames.indexOf(fields[i]); - if (index == -1) { - throw new RuntimeException(String.format("Cannot find field %s in schema %s", - fields[i], paimonAllFieldNames)); - } - DataType dataType = table.rowType().getTypeAt(index); - columnTypes[i] = ColumnType.parseType(fields[i], dataType.toString()); - } - super.types = columnTypes; - } - - @Override - public void close() throws IOException { - reader.close(); - } - - @Override - protected int getNext() throws IOException { - int rows = 0; - try { - RecordReader.RecordIterator batch; - while ((batch = reader.readBatch()) != null) { - InternalRow record; - while ((record = batch.next()) != null) { - columnValue.setOffsetRow(record); - for (int i = 0; i < fields.length; i++) { - columnValue.setIdx(i, types[i]); - appendData(i, columnValue); - } - rows++; - } - batch.releaseBatch(); - } - } catch (IOException e) { - LOG.warn("failed to getNext columnValue ", e); - throw new RuntimeException(e); - } - return rows; - } - - @Override - protected TableSchema parseTableSchema() throws UnsupportedOperationException { - // do nothing - return null; - } - - private void initTable() { - try { - Catalog catalog = createCatalog(); - table = catalog.getTable(Identifier.create(dbName, tblName)); - paimonAllFieldNames = PaimonScannerUtils.fieldNames(table.rowType()); - LOG.info("paimonAllFieldNames:{}", paimonAllFieldNames); - } catch (Catalog.TableNotExistException e) { - LOG.warn("failed to create paimon external catalog ", e); - throw new RuntimeException(e); - } - } - - private Catalog createCatalog() { - Options options = new Options(); - paimonOptionParams.entrySet().stream().forEach(kv -> options.set(kv.getKey(), kv.getValue())); - CatalogContext context = CatalogContext.create(options); - return CatalogFactory.createCatalog(context); - } -} diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonScannerUtils.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonScannerUtils.java deleted file mode 100644 index 102b6f01d502f2..00000000000000 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonScannerUtils.java +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.paimon; - -import org.apache.paimon.types.DataField; -import org.apache.paimon.types.RowType; -import org.apache.paimon.utils.InstantiationUtil; - -import java.util.Base64; -import java.util.List; -import java.util.stream.Collectors; - -public class PaimonScannerUtils { - private static final Base64.Decoder BASE64_DECODER = Base64.getUrlDecoder(); - - public static T decodeStringToObject(String encodedStr) { - final byte[] bytes = BASE64_DECODER.decode(encodedStr.getBytes(java.nio.charset.StandardCharsets.UTF_8)); - try { - return InstantiationUtil.deserializeObject(bytes, PaimonScannerUtils.class.getClassLoader()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - public static List fieldNames(RowType rowType) { - return rowType.getFields().stream() - .map(DataField::name) - .collect(Collectors.toList()); - } -} diff --git a/fe/be-java-extensions/paimon-scanner/src/main/resources/package.xml b/fe/be-java-extensions/paimon-scanner/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/paimon-scanner/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/be-java-extensions/pom.xml b/fe/be-java-extensions/pom.xml deleted file mode 100644 index af8584d17760dc..00000000000000 --- a/fe/be-java-extensions/pom.xml +++ /dev/null @@ -1,48 +0,0 @@ - - - - 4.0.0 - - hudi-scanner - java-common - java-udf - jdbc-scanner - paimon-scanner - max-compute-scanner - avro-scanner - preload-extensions - - - - org.apache.doris - ${revision} - fe - ../pom.xml - - - be-java-extensions - pom - - ${basedir}/../../ - 1 - - - diff --git a/fe/be-java-extensions/preload-extensions/pom.xml b/fe/be-java-extensions/preload-extensions/pom.xml deleted file mode 100644 index 830ee1ea8f4c72..00000000000000 --- a/fe/be-java-extensions/preload-extensions/pom.xml +++ /dev/null @@ -1,253 +0,0 @@ - - - - - be-java-extensions - org.apache.doris - ${revision} - - 4.0.0 - - preload-extensions - - - 8 - 8 - 2.12 - 3.2.0 - 3.0.16 - - - - - org.apache.arrow - arrow-memory-unsafe - ${arrow.version} - compile - - - org.apache.parquet - parquet-avro - 1.10.1 - compile - - - org.scala-lang - scala-library - ${scala.version} - compile - - - - org.apache.hadoop - hadoop-common - - - org.apache.hudi - hudi-spark-client - ${hudi.version} - - - org.apache.hudi - hudi-spark-common_${scala.binary.version} - ${hudi.version} - - - org.apache.avro - avro - - - org.apache.avro - avro-tools - - - - - org.apache.hudi - hudi-spark3-common - ${hudi.version} - - - org.apache.hudi - hudi-spark3.2.x_${scala.binary.version} - ${hudi.version} - - - json4s-ast_2.11 - org.json4s - - - json4s-core_2.11 - org.json4s - - - json4s-jackson_2.11 - org.json4s - - - json4s-scalap_2.11 - org.json4s - - - - - org.apache.spark - spark-core_${scala.binary.version} - - - javax.servlet - * - - - jackson-module-scala_2.12 - com.fasterxml.jackson.module - - - hadoop-client-api - org.apache.hadoop - - - hadoop-client-runtime - org.apache.hadoop - - - ${spark.version} - compile - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} - compile - - - org.apache.spark - spark-launcher_${scala.binary.version} - ${spark.version} - compile - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${spark.version} - compile - - - org.codehaus.janino - janino - - - org.codehaus.janino - commons-compiler - - - - - - org.codehaus.janino - janino - ${janino.version} - - - org.codehaus.janino - commons-compiler - - - - - org.codehaus.janino - commons-compiler - ${janino.version} - - - - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} - ${jackson.version} - - - com.google.guava - guava - - - - - - com.oracle.database.jdbc - ojdbc8 - - - com.alibaba - druid - - - com.clickhouse - clickhouse-jdbc - all - compile - - - com.oracle.ojdbc - orai18n - 19.3.0.0 - - - org.apache.doris - hive-catalog-shade - - - - org.apache.hadoop - hadoop-cos - 3.3.5 - - - - - preload-extensions - - - org.apache.maven.plugins - maven-assembly-plugin - - - src/main/resources/package.xml - - - - - - - - - - make-assembly - package - - single - - - - - - - diff --git a/fe/be-java-extensions/preload-extensions/src/main/java/org/apache/doris/preload/README.md b/fe/be-java-extensions/preload-extensions/src/main/java/org/apache/doris/preload/README.md deleted file mode 100644 index 6857726c5bb62c..00000000000000 --- a/fe/be-java-extensions/preload-extensions/src/main/java/org/apache/doris/preload/README.md +++ /dev/null @@ -1,217 +0,0 @@ -- [Preload Dependencies For BE Extensions](#Preload-Dependencies-For-BE-Extensions) - - [Avro Scanner](#Avro-Scanner) - - [Hudi Scanner](#Hudi-Scanner) - - [MaxCompute Scanner](#MaxCompute-Scanner) - - [Paimon Scanner](#Paimon-Scanner) - - [JDBC Scanner](#JDBC-Scanner) - -# Preload Dependencies For BE Extensions - -## Avro Scanner - -Avro Scanner Compile Dependencies: - -``` - - org.apache.avro - avro - - - org.apache.avro - avro-tools - - - -``` - -## Hudi Scanner - -Hudi Scanner Compile Dependencies: - -``` - - org.apache.parquet - parquet-avro - 1.10.1 - compile - - - org.scala-lang - scala-library - ${scala.version} - compile - - - org.apache.hadoop - hadoop-common - - - org.apache.hudi - hudi-spark-client - ${hudi.version} - - - org.apache.hudi - hudi-spark-common_${scala.binary.version} - ${hudi.version} - - - org.apache.hudi - hudi-spark3-common - ${hudi.version} - - - org.apache.hudi - hudi-spark3.2.x_${scala.binary.version} - ${hudi.version} - - - json4s-ast_2.11 - org.json4s - - - json4s-core_2.11 - org.json4s - - - json4s-jackson_2.11 - org.json4s - - - json4s-scalap_2.11 - org.json4s - - - - - org.apache.spark - spark-core_${scala.binary.version} - - - javax.servlet - * - - - jackson-module-scala_2.12 - com.fasterxml.jackson.module - - - hadoop-client-api - org.apache.hadoop - - - hadoop-client-runtime - org.apache.hadoop - - - ${spark.version} - compile - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} - compile - - - org.apache.spark - spark-launcher_${scala.binary.version} - ${spark.version} - compile - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${spark.version} - compile - - - org.codehaus.janino - janino - - - org.codehaus.janino - commons-compiler - - - - - - org.codehaus.janino - janino - ${janino.version} - - - org.codehaus.janino - commons-compiler - - - - - org.codehaus.janino - commons-compiler - ${janino.version} - - - - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} - ${jackson.version} - - - com.google.guava - guava - - - -``` - - -## MaxCompute Scanner - -MaxCompute Scanner Compile Dependencies: - -``` - - org.apache.arrow - arrow-memory-unsafe - ${arrow.version} - compile - -``` - -## Paimon Scanner - -``` - -``` - -## JDBC Scanner - -JDBC Scanner Compile Dependencies: - -``` - - com.oracle.database.jdbc - ojdbc8 - - - com.alibaba - druid - - - com.clickhouse - clickhouse-jdbc - all - compile - - - com.oracle.ojdbc - orai18n - 19.3.0.0 - - - org.apache.doris - hive-catalog-shade - -``` \ No newline at end of file diff --git a/fe/be-java-extensions/preload-extensions/src/main/resources/package.xml b/fe/be-java-extensions/preload-extensions/src/main/resources/package.xml deleted file mode 100644 index 4bbb2610603363..00000000000000 --- a/fe/be-java-extensions/preload-extensions/src/main/resources/package.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - jar-with-dependencies - - jar - - false - - - / - true - true - runtime - - - **/Log4j2Plugins.dat - - - - - diff --git a/fe/pom.xml b/fe/pom.xml index eaafa3c8cafd71..d308a4d30ee8aa 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -186,7 +186,7 @@ under the License. spark-dpp fe-core hive-udf - be-java-extensions +