From 3fda18d7d4435952822adcea8636f531e9a2a79c Mon Sep 17 00:00:00 2001 From: yuexie Date: Wed, 18 Dec 2024 16:45:51 +0800 Subject: [PATCH 1/3] repo-sync-2024-12-18T16:45:42+0800 --- build/Dockerfiles/dataproxy.Dockerfile | 11 +- config/application.yaml | 40 -- dataproxy-api/pom.xml | 10 +- dataproxy-common/pom.xml | 27 +- .../model/dataset/format/CSVFormatConfig.java | 66 -- .../dataset/format/PartitionBehavior.java | 73 --- .../dataset/format/TableFormatConfig.java | 56 -- .../datasource/DatasourceConnConfig.java | 5 - .../datasource/conn/JdbcBaseConnConfig.java | 88 --- .../conn/LocalFileSystemConnConfig.java | 42 -- .../conn/ObjectFileSystemConnConfig.java | 76 --- .../model/datasource/conn/OdpsConnConfig.java | 11 +- .../location/FileSystemLocationConfig.java | 42 -- .../location/JdbcLocationConfig.java | 42 -- .../location/MinioLocationConfig.java | 35 -- .../location/OSSLocationConfig.java | 38 -- .../serializer/SensitiveDataSerializer.java | 2 +- .../dataproxy/common/utils/ArrowUtil.java | 51 ++ .../dataproxy/common/utils/AssertUtil.java | 40 ++ .../dataproxy/common/utils/DPStringUtils.java | 64 -- .../dataproxy/common/utils/EnvVarUtils.java | 72 +++ .../dataproxy/common/utils/IdUtils.java | 22 +- dataproxy-core/pom.xml | 51 ++ .../dataproxy/core/config/ConfigLoader.java | 50 ++ .../DefaultFlightServerConfigLoader.java | 65 ++ .../core/config/FlightServerConfig.java | 28 +- .../core/config/FlightServerConfigKey.java | 24 +- .../core/config/FlightServerContext.java | 70 +++ .../converter/AbstractMultiTypeConverter.java | 39 ++ .../converter/AbstractValueConverter.java | 36 ++ .../core/converter/BigIntVectorConverter.java | 45 ++ .../core/converter/BitVectorConverter.java | 43 ++ .../converter/DataProxyParamConverter.java | 30 +- .../converter/DateDayVectorConverter.java | 43 ++ .../converter/DateMilliVectorConverter.java | 44 ++ .../core/converter/Float4VectorConverter.java | 46 ++ .../core/converter/Float8VectorConverter.java | 43 ++ .../core/converter/IntVectorConverter.java | 44 ++ .../converter/SmallIntVectorConverter.java | 44 ++ .../converter/TimeMilliVectorConvertor.java | 43 ++ .../TimeStampNanoVectorConverter.java | 40 ++ .../converter/TinyIntVectorConverter.java | 44 ++ .../converter/ValueConversionStrategy.java | 27 + .../converter/VarCharVectorConverter.java | 41 ++ .../dataproxy/core/param/ParamWrapper.java | 55 ++ .../dataproxy/core/reader/AbstractReader.java | 44 ++ .../dataproxy/core/reader/AbstractSender.java | 173 ++++++ .../dataproxy/core/reader/ReadJobContext.java | 38 +- .../dataproxy/core/reader/Reader.java | 26 +- .../dataproxy/core/reader/Sender.java | 30 + .../repository/CaffeineDataRepository.java | 78 +++ .../repository/ParamWrapperRepository.java | 34 +- .../core}/service/TicketService.java | 29 +- .../core/service/impl/CacheTicketService.java | 87 +++ .../spi/producer/DataProxyFlightProducer.java | 44 +- .../core/visitor/BooleanValueVisitor.java | 75 +++ .../core/visitor/ByteArrayValueVisitor.java | 80 +++ .../core/visitor/ByteValueVisitor.java | 55 ++ .../core/visitor/DoubleValueVisitor.java | 67 +++ .../core/visitor/FloatValueVisitor.java | 67 +++ .../core/visitor/IntegerValueVisitor.java | 96 +++ .../core/visitor/LongValueVisitor.java | 117 ++++ .../core/visitor/ShortValueVisitor.java | 68 +++ .../dataproxy/core/visitor/ValueVisitor.java | 89 +++ .../dataproxy/core/writer/Writer.java | 31 + ...retflow.dataproxy.core.config.ConfigLoader | 1 + .../integration/tests/DPFlightClient.java | 2 +- .../config/OdpsKusciaConnectorConfig.java | 80 +++ dataproxy-manager/pom.xml | 60 -- .../dataproxy/manager/Connector.java | 67 --- .../dataproxy/manager/DataWriter.java | 47 -- .../filesystem/BinaryFileDataReader.java | 54 -- .../filesystem/BinaryFileDataWriter.java | 90 --- .../filesystem/BinaryFileSplitReader.java | 137 ----- .../connector/filesystem/CSVDataReader.java | 66 -- .../connector/filesystem/CSVDataWriter.java | 131 ---- .../connector/filesystem/CSVSplitReader.java | 464 --------------- .../filesystem/FileSystemConnector.java | 241 -------- .../manager/connector/odps/OdpsConnector.java | 121 ---- .../connector/odps/OdpsDataReader.java | 52 -- .../connector/odps/OdpsResourceReader.java | 48 -- .../connector/odps/OdpsResourceWriter.java | 123 ---- .../connector/odps/OdpsSplitArrowReader.java | 342 ----------- .../manager/connector/odps/OdpsUtil.java | 49 -- .../connector/rdbms/JdbcAssistant.java | 324 ---------- .../connector/rdbms/JdbcConnector.java | 393 ------------ .../connector/rdbms/JdbcDataReader.java | 208 ------- .../connector/rdbms/JdbcDataWriter.java | 129 ---- .../connector/rdbms/JdbcSplitReader.java | 256 -------- .../connector/rdbms/MysqlJdbcAssistant.java | 116 ---- .../rdbms/adaptor/ArrowVectorIterator.java | 191 ------ .../connector/rdbms/adaptor/Constants.java | 32 - .../rdbms/adaptor/JdbcFieldInfo.java | 154 ----- .../rdbms/adaptor/JdbcParameterBinder.java | 163 ----- .../connector/rdbms/adaptor/JdbcToArrow.java | 104 ---- .../rdbms/adaptor/JdbcToArrowConfig.java | 336 ----------- .../adaptor/JdbcToArrowConfigBuilder.java | 284 --------- .../rdbms/adaptor/JdbcToArrowUtils.java | 448 -------------- .../adaptor/binder/BaseColumnBinder.java | 45 -- .../rdbms/adaptor/binder/BigIntBinder.java | 43 -- .../rdbms/adaptor/binder/BitBinder.java | 47 -- .../rdbms/adaptor/binder/ColumnBinder.java | 71 --- .../binder/ColumnBinderArrowTypeVisitor.java | 228 ------- .../rdbms/adaptor/binder/DateDayBinder.java | 59 -- .../rdbms/adaptor/binder/DateMilliBinder.java | 58 -- .../adaptor/binder/Decimal128Binder.java | 46 -- .../adaptor/binder/Decimal256Binder.java | 46 -- .../adaptor/binder/FixedSizeBinaryBinder.java | 45 -- .../rdbms/adaptor/binder/Float4Binder.java | 43 -- .../rdbms/adaptor/binder/Float8Binder.java | 43 -- .../rdbms/adaptor/binder/IntBinder.java | 43 -- .../rdbms/adaptor/binder/ListBinder.java | 76 --- .../rdbms/adaptor/binder/MapBinder.java | 90 --- .../adaptor/binder/NullableColumnBinder.java | 53 -- .../rdbms/adaptor/binder/SmallIntBinder.java | 43 -- .../rdbms/adaptor/binder/Time32Binder.java | 65 -- .../rdbms/adaptor/binder/Time64Binder.java | 64 -- .../rdbms/adaptor/binder/TimeStampBinder.java | 101 ---- .../rdbms/adaptor/binder/TinyIntBinder.java | 43 -- .../rdbms/adaptor/binder/VarBinaryBinder.java | 62 -- .../rdbms/adaptor/binder/VarCharBinder.java | 63 -- .../rdbms/adaptor/binder/package-info.java | 22 - .../rdbms/adaptor/consumer/ArrayConsumer.java | 143 ----- .../rdbms/adaptor/consumer/BaseConsumer.java | 54 -- .../adaptor/consumer/BigIntConsumer.java | 87 --- .../adaptor/consumer/BinaryConsumer.java | 138 ----- .../rdbms/adaptor/consumer/BitConsumer.java | 87 --- .../rdbms/adaptor/consumer/BlobConsumer.java | 71 --- .../rdbms/adaptor/consumer/ClobConsumer.java | 168 ------ .../consumer/CompositeJdbcConsumer.java | 76 --- .../rdbms/adaptor/consumer/DateConsumer.java | 114 ---- .../adaptor/consumer/DecimalConsumer.java | 129 ---- .../adaptor/consumer/DoubleConsumer.java | 87 --- .../rdbms/adaptor/consumer/FloatConsumer.java | 87 --- .../rdbms/adaptor/consumer/IntConsumer.java | 87 --- .../rdbms/adaptor/consumer/JdbcConsumer.java | 47 -- .../rdbms/adaptor/consumer/MapConsumer.java | 103 ---- .../rdbms/adaptor/consumer/NullConsumer.java | 38 -- .../adaptor/consumer/SmallIntConsumer.java | 87 --- .../rdbms/adaptor/consumer/TimeConsumer.java | 112 ---- .../adaptor/consumer/TimestampConsumer.java | 89 --- .../adaptor/consumer/TimestampTZConsumer.java | 97 --- .../adaptor/consumer/TinyIntConsumer.java | 87 --- .../adaptor/consumer/VarCharConsumer.java | 86 --- .../dataproxy-plugin-odps/pom.xml | 28 + .../config/DefaultOdpsFlightConfigLoader.java | 46 ++ .../EnvironmentOdpsFlightConfigLoader.java | 68 +++ .../plugin/odps/config/OdpsCommandConfig.java | 43 ++ .../odps/config/OdpsConfigConstant.java | 38 ++ .../plugin/odps/config/OdpsConnectConfig.java | 34 ++ .../plugin/odps/config/OdpsTableConfig.java | 43 +- .../odps/config/OdpsTableQueryConfig.java | 55 ++ .../plugin/odps/config/OdpsWriteConfig.java | 52 ++ .../odps/config/ScqlCommandJobConfig.java | 42 ++ .../plugin/odps/config/TaskConfig.java | 66 ++ .../plugin/odps/constant/OdpsTypeEnum.java | 32 +- .../odps/converter/OdpsParamConverter.java | 80 +++ .../odps/io/DynamicSequenceInputStream.java | 150 +++++ .../odps/producer/OdpsFlightProducer.java | 247 ++++++++ .../plugin/odps/reader/OdpsDoGetContext.java | 211 +++++++ .../odps/reader/OdpsDoGetTaskContext.java | 101 ++++ .../plugin/odps/reader/OdpsReader.java | 105 ++++ .../plugin/odps/reader/OdpsRecordSender.java | 185 ++++++ .../odps/reader/OdpsResourceReader.java | 117 ++-- .../odps/reader/OdpsTunnelRecordReader.java | 78 +++ .../dataproxy/plugin/odps/utils/OdpsUtil.java | 104 ++++ .../plugin/odps/writer/OdpsRecordWriter.java | 131 ++-- .../odps/writer/OdpsResourceWriter.java | 152 +++++ ...retflow.dataproxy.core.config.ConfigLoader | 2 + ....core.spi.producer.DataProxyFlightProducer | 1 + .../pom.xml | 28 +- dataproxy-server/pom.xml | 61 +- .../server/DataProxyApplication.java | 41 -- .../server/DataProxyFlightServer.java | 87 +++ .../server/DataProxyServerApplication.java | 46 ++ .../dataproxy/server/DataproxyLauncher.java | 87 --- .../server/FlightServerTraceMiddleware.java | 18 +- .../dataproxy/server/ProtoObjConvertor.java | 427 ++++++------- .../dataproxy/server/config/ArrowConfig.java | 122 ---- .../flight/CompositeFlightProducer.java | 251 ++++++++ .../server/flight/DataproxyProducerImpl.java | 366 ------------ .../server/flight/ProducerRegistry.java | 56 ++ .../src/main/resources/application.yaml | 40 -- .../src/main/resources/logback.xml | 29 + .../dataproxy/service/DataProxyService.java | 89 --- .../impl/DataProxyServiceDirectImpl.java | 187 ------ .../service/impl/TicketServiceImpl.java | 92 --- dataproxy_sdk/bazel/repositories.bzl | 21 +- dataproxy_sdk/cc/BUILD.bazel | 24 + dataproxy_sdk/cc/api.h | 3 +- dataproxy_sdk/cc/data_proxy_file.cc | 37 +- dataproxy_sdk/cc/data_proxy_pb.cc | 34 +- dataproxy_sdk/cc/data_proxy_pb.h | 8 + dataproxy_sdk/cc/data_proxy_stream.cc | 248 ++++++++ dataproxy_sdk/cc/data_proxy_stream.h | 71 +++ dataproxy_sdk/cc/data_proxy_stream_test.cc | 77 +++ dataproxy_sdk/cc/exception.h | 40 +- dataproxy_sdk/cc/file_help.cc | 3 +- dataproxy_sdk/cc/file_help_test.cc | 34 +- dataproxy_sdk/proto/data_proxy_pb.proto | 6 + dataproxy_sdk/python/dataproxy/BUILD.bazel | 30 +- dataproxy_sdk/python/dataproxy/__init__.py | 15 +- dataproxy_sdk/python/dataproxy/_lib.cc | 135 +++++ .../{dp_file_adapter.py => file_adapter.py} | 40 +- .../python/dataproxy/{dp_pb2.py => proto.py} | 1 + dataproxy_sdk/python/dataproxy/sdk.py | 7 +- dataproxy_sdk/python/dataproxy/stream.py | 65 ++ dataproxy_sdk/python/dataproxy/version.py | 2 +- dataproxy_sdk/python/requirements.txt | 4 +- dataproxy_sdk/python/setup.py | 13 +- dataproxy_sdk/python/test/BUILD.bazel | 62 ++ .../libdataproxy.cc => test/_dm_mock.cc} | 41 +- dataproxy_sdk/python/test/dm_mock.py | 26 + dataproxy_sdk/python/test/file_test.py | 69 +++ dataproxy_sdk/python/test/stream_test.py | 64 ++ dataproxy_sdk/test/data_mesh_mock.cc | 13 +- pom.xml | 562 +++++++----------- proto/kuscia/flightdm.proto | 7 +- proto/kuscia/flightinner.proto | 5 + 219 files changed, 6503 insertions(+), 11624 deletions(-) delete mode 100644 config/application.yaml delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/CSVFormatConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/PartitionBehavior.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableFormatConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/JdbcBaseConnConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/LocalFileSystemConnConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/ObjectFileSystemConnConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/FileSystemLocationConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/JdbcLocationConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MinioLocationConfig.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/OSSLocationConfig.java create mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/ArrowUtil.java create mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/AssertUtil.java delete mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/DPStringUtils.java create mode 100644 dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/EnvVarUtils.java create mode 100644 dataproxy-core/pom.xml create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/ConfigLoader.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/DefaultFlightServerConfigLoader.java rename dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataReader.java => dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfig.java (59%) rename dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/IndexType.java => dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfigKey.java (67%) create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerContext.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractMultiTypeConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractValueConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BigIntVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BitVectorConverter.java rename dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MysqlLocationConfig.java => dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DataProxyParamConverter.java (57%) create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateDayVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateMilliVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float4VectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float8VectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/IntVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/SmallIntVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeMilliVectorConvertor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeStampNanoVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TinyIntVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/ValueConversionStrategy.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/VarCharVectorConverter.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/param/ParamWrapper.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractReader.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractSender.java rename dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/StructuredDataSchema.java => dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/ReadJobContext.java (55%) rename dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/SplitReader.java => dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Reader.java (62%) create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Sender.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/CaffeineDataRepository.java rename dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MinioConnConfig.java => dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/ParamWrapperRepository.java (56%) rename {dataproxy-service/src/main/java/org/secretflow/dataproxy => dataproxy-core/src/main/java/org/secretflow/dataproxy/core}/service/TicketService.java (57%) create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/service/impl/CacheTicketService.java rename dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/DataField.java => dataproxy-core/src/main/java/org/secretflow/dataproxy/core/spi/producer/DataProxyFlightProducer.java (51%) create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/BooleanValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteArrayValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/DoubleValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/FloatValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/IntegerValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/LongValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ShortValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ValueVisitor.java create mode 100644 dataproxy-core/src/main/java/org/secretflow/dataproxy/core/writer/Writer.java create mode 100644 dataproxy-core/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader create mode 100644 dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/config/OdpsKusciaConnectorConfig.java delete mode 100644 dataproxy-manager/pom.xml delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/Connector.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataWriter.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataWriter.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileSplitReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataWriter.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVSplitReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/FileSystemConnector.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsConnector.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceWriter.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsSplitArrowReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsUtil.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcAssistant.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcConnector.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataWriter.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcSplitReader.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/MysqlJdbcAssistant.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/ArrowVectorIterator.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/Constants.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcFieldInfo.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcParameterBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrow.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfig.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfigBuilder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowUtils.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BaseColumnBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BigIntBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BitBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinderArrowTypeVisitor.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateDayBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateMilliBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal128Binder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal256Binder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/FixedSizeBinaryBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float4Binder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float8Binder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/IntBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ListBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/MapBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/NullableColumnBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/SmallIntBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time32Binder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time64Binder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TimeStampBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TinyIntBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarBinaryBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarCharBinder.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/package-info.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ArrayConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BaseConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BigIntConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BinaryConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BitConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BlobConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ClobConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/CompositeJdbcConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DateConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DecimalConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DoubleConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/FloatConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/IntConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/JdbcConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/MapConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/NullConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/SmallIntConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimeConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampTZConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TinyIntConsumer.java delete mode 100644 dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/VarCharConsumer.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/pom.xml create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/DefaultOdpsFlightConfigLoader.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/EnvironmentOdpsFlightConfigLoader.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsCommandConfig.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConfigConstant.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConnectConfig.java rename dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableIndex.java => dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableConfig.java (50%) create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableQueryConfig.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsWriteConfig.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/ScqlCommandJobConfig.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/TaskConfig.java rename dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MysqlConnConfig.java => dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/constant/OdpsTypeEnum.java (61%) create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/converter/OdpsParamConverter.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/io/DynamicSequenceInputStream.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/producer/OdpsFlightProducer.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetContext.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetTaskContext.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsReader.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsRecordSender.java rename dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceSplitReader.java => dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsResourceReader.java (58%) create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsTunnelRecordReader.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/utils/OdpsUtil.java rename dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataWriter.java => dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsRecordWriter.java (83%) create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsResourceWriter.java create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader create mode 100644 dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.spi.producer.DataProxyFlightProducer rename {dataproxy-service => dataproxy-plugins}/pom.xml (51%) delete mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyApplication.java create mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyFlightServer.java create mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyServerApplication.java delete mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataproxyLauncher.java delete mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/config/ArrowConfig.java create mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/CompositeFlightProducer.java delete mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/DataproxyProducerImpl.java create mode 100644 dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/ProducerRegistry.java delete mode 100644 dataproxy-server/src/main/resources/application.yaml create mode 100644 dataproxy-server/src/main/resources/logback.xml delete mode 100644 dataproxy-service/src/main/java/org/secretflow/dataproxy/service/DataProxyService.java delete mode 100644 dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/DataProxyServiceDirectImpl.java delete mode 100644 dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/TicketServiceImpl.java create mode 100644 dataproxy_sdk/cc/data_proxy_stream.cc create mode 100644 dataproxy_sdk/cc/data_proxy_stream.h create mode 100644 dataproxy_sdk/cc/data_proxy_stream_test.cc create mode 100644 dataproxy_sdk/python/dataproxy/_lib.cc rename dataproxy_sdk/python/dataproxy/{dp_file_adapter.py => file_adapter.py} (63%) rename dataproxy_sdk/python/dataproxy/{dp_pb2.py => proto.py} (91%) create mode 100644 dataproxy_sdk/python/dataproxy/stream.py create mode 100644 dataproxy_sdk/python/test/BUILD.bazel rename dataproxy_sdk/python/{dataproxy/libdataproxy.cc => test/_dm_mock.cc} (50%) create mode 100644 dataproxy_sdk/python/test/dm_mock.py create mode 100644 dataproxy_sdk/python/test/file_test.py create mode 100644 dataproxy_sdk/python/test/stream_test.py diff --git a/build/Dockerfiles/dataproxy.Dockerfile b/build/Dockerfiles/dataproxy.Dockerfile index e165256..9dfc885 100644 --- a/build/Dockerfiles/dataproxy.Dockerfile +++ b/build/Dockerfiles/dataproxy.Dockerfile @@ -20,9 +20,10 @@ WORKDIR /app # fix: RunP proot + java bug RUN ln -s ${JAVA_HOME}/lib/libjli.so /lib64 -COPY target/*.jar dataproxy.jar -COPY config/application.yaml application.yaml -COPY scripts/start_dp.sh start_dp.sh -ENV JAVA_OPTS="" SPRING_PROFILES_ACTIVE="default" +COPY dataproxy-server/target/dataproxy-server-0.0.1-SNAPSHOT.jar dataproxy.jar +COPY libs/*.jar libs/ + +ENV JAVA_OPTS="" +ENV LOG_LEVEL=INFO EXPOSE 8023 -ENTRYPOINT ${JAVA_HOME}/bin/java ${JAVA_OPTS} -Dsun.net.http.allowRestrictedHeaders=true --add-opens=java.base/java.nio=ALL-UNNAMED -jar -Dspring.profiles.active=${SPRING_PROFILES_ACTIVE} ./dataproxy.jar \ No newline at end of file +ENTRYPOINT ${JAVA_HOME}/bin/java ${JAVA_OPTS} -Dsun.net.http.allowRestrictedHeaders=true --add-opens=java.base/java.nio=ALL-UNNAMED -jar ./dataproxy.jar \ No newline at end of file diff --git a/config/application.yaml b/config/application.yaml deleted file mode 100644 index 614120e..0000000 --- a/config/application.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2024 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -spring: - # profiles: - # active: local - autoconfigure: - exclude: org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration - application: - name: dataproxy - servlet: - multipart: - max-file-size: -1 - max-request-size: -1 - file-size-threshold: -1 - -logging: - level: - root: info - file: - path: "./logs" - -dataproxy: - flight: - host: 127.0.0.1 # getFlightInfo 返回的endpoint ip - port: 8023 - ticket: - timeout: 300 # 过期时间,单位秒 - onlyOnce: true # 是否一次性,true:一次性使用,false:允许多次调用,超时销毁 \ No newline at end of file diff --git a/dataproxy-api/pom.xml b/dataproxy-api/pom.xml index 9d29f4f..137e232 100644 --- a/dataproxy-api/pom.xml +++ b/dataproxy-api/pom.xml @@ -7,6 +7,7 @@ org.secretflow dataproxy 0.0.1-SNAPSHOT + ../pom.xml dataproxy-api @@ -21,16 +22,19 @@ protobuf-java-util - org.apache.arrow - flight-grpc + io.grpc + grpc-protobuf + compile io.grpc - grpc-netty-shaded + grpc-stub + compile javax.annotation javax.annotation-api + compile diff --git a/dataproxy-common/pom.xml b/dataproxy-common/pom.xml index 105c26e..78ca93c 100644 --- a/dataproxy-common/pom.xml +++ b/dataproxy-common/pom.xml @@ -7,6 +7,7 @@ org.secretflow dataproxy 0.0.1-SNAPSHOT + ../pom.xml dataproxy-common @@ -20,23 +21,24 @@ org.projectlombok lombok + compile - + + + io.netty netty-all @@ -45,10 +47,10 @@ io.netty netty-tcnative-boringssl-static - + com.google.protobuf @@ -80,12 +82,17 @@ org.apache.arrow flight-core - + + + \ No newline at end of file diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/CSVFormatConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/CSVFormatConfig.java deleted file mode 100644 index 7adcf39..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/CSVFormatConfig.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.dataset.format; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; - -import java.util.Map; - -/** - * CSV format config - * - * @author muhong - * @date 2023-08-30 19:32 - */ -@Getter -@Builder -@AllArgsConstructor -@NoArgsConstructor -public class CSVFormatConfig implements FormatConfig { - - /** - * Field name map, key: raw name, value:output name - */ - Map fieldMap; - - /** - * With header line - */ - @Builder.Default - private Boolean withHeaderLine = true; - - /** - * Separator - */ - @Builder.Default - private String separator = ","; - - /** - * QuoteChar - */ - @Builder.Default - private String quoteChar = "\""; - - /** - * EscapeChar - */ - @Builder.Default - private String escapeChar = "\\"; -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/PartitionBehavior.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/PartitionBehavior.java deleted file mode 100644 index 14f500d..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/PartitionBehavior.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.dataset.format; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.commons.lang3.StringUtils; - -import java.util.List; - -/** - * Partition behavior - * - * @author muhong - * @date 2023-10-23 15:44 - */ -@Data -@NoArgsConstructor -@AllArgsConstructor -@Builder -public class PartitionBehavior { - - /** - * Field name - */ - private String fieldName; - - /** - * Field type - */ - private ArrowType.ArrowTypeID type; - - /** - * Lower bound - */ - private String lowerBound; - - /** - * Upper bound - */ - private String upperBound; - - /** - * Partition step - */ - private String step; - - /** - * Predicates, eg["id>=0 AND id<100", "id>=100 AND id<200", "id>=200 AND id<300"] - */ - private List predicates; - - public boolean isValid() { - return StringUtils.isNotEmpty(fieldName) && type != null && StringUtils.isNotEmpty(lowerBound) && StringUtils.isNotEmpty(upperBound); - } -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableFormatConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableFormatConfig.java deleted file mode 100644 index 30fce07..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableFormatConfig.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.dataset.format; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -import java.util.List; -import java.util.Map; - -/** - * Table format config - * @author muhong - * @date 2023-08-30 19:36 - */ -@Data -@NoArgsConstructor -@AllArgsConstructor -@Builder(toBuilder = true) -public class TableFormatConfig implements FormatConfig { - /** - * Primary key - */ - private String primaryKey; - - /** - * Index list - */ - private List indexList; - - /** - * Partition behavior - */ - private PartitionBehavior partitionBehavior; - - /** - * Field name map - */ - private Map fieldMap; -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/DatasourceConnConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/DatasourceConnConfig.java index f71ef14..8c461a6 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/DatasourceConnConfig.java +++ b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/DatasourceConnConfig.java @@ -21,8 +21,6 @@ import lombok.Data; import lombok.NoArgsConstructor; import org.secretflow.dataproxy.common.model.datasource.conn.ConnConfig; -import org.secretflow.dataproxy.common.utils.IdUtils; -import org.secretflow.dataproxy.common.utils.JsonUtils; /** * Datasource connection config @@ -46,7 +44,4 @@ public class DatasourceConnConfig { */ private ConnConfig connConfig; - public String generateUniqueId() { - return IdUtils.combineIds(JsonUtils.toJSONString(this)); - } } diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/JdbcBaseConnConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/JdbcBaseConnConfig.java deleted file mode 100644 index 6f6823d..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/JdbcBaseConnConfig.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.datasource.conn; - -import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.Setter; -import lombok.NoArgsConstructor; -import lombok.experimental.SuperBuilder; -import org.secretflow.dataproxy.common.serializer.SensitiveDataSerializer; - -import java.util.Map; - -/** - * JDBC datasource connection config - * - * @author muhong - * @date 2023-09-07 14:06 - */ -@Getter -@Setter -@AllArgsConstructor -@NoArgsConstructor -@SuperBuilder -public class JdbcBaseConnConfig implements ConnConfig { - - /** - * Host - */ - private String host; - - /** - * Dataset - */ - private String database; - - /** - * Username - */ - @JsonSerialize(using = SensitiveDataSerializer.class) - private String userName; - - /** - * Password - */ - @JsonSerialize(using = SensitiveDataSerializer.class) - private String password; - - /** - * Options - */ - private Map option; - - @Builder.Default - private Integer maximumPoolSize = 10; - - @Builder.Default - private Integer minimumIdle = 2; - - @Builder.Default - private Boolean cachePrepStmts = true; - - @Builder.Default - private Boolean useServerPrepStmts = true; - - @Builder.Default - private Integer prepStmtCacheSize = 200; - - @Builder.Default - private Integer prepStmtCacheSqlLimit = 2048; - -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/LocalFileSystemConnConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/LocalFileSystemConnConfig.java deleted file mode 100644 index 631cd19..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/LocalFileSystemConnConfig.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.datasource.conn; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.experimental.SuperBuilder; - -/** - * Local filesystem datasource connection config - * - * @author muhong - * @date 2023-09-13 11:46 - */ -@Getter -@Setter -@AllArgsConstructor -@NoArgsConstructor -@SuperBuilder -public class LocalFileSystemConnConfig implements ConnConfig { - - /** - * Path - */ - private String path; -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/ObjectFileSystemConnConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/ObjectFileSystemConnConfig.java deleted file mode 100644 index 5df02a5..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/ObjectFileSystemConnConfig.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.datasource.conn; - -import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.experimental.SuperBuilder; -import org.secretflow.dataproxy.common.serializer.SensitiveDataSerializer; - -/** - * Oss datasource connection config - * - * @author muhong - * @date 2023-09-11 11:34 - */ -@Getter -@Setter -@AllArgsConstructor -@NoArgsConstructor -@SuperBuilder -public class ObjectFileSystemConnConfig implements ConnConfig { - - /** - * 地址 - */ - private String endpoint; - - /** - * 访问秘钥 - */ - @JsonSerialize(using = SensitiveDataSerializer.class) - private String accessKey; - - /** - * ak的密码 - */ - @JsonSerialize(using = SensitiveDataSerializer.class) - private String accessSecret; - - /** - * 通信协议,http 或 https - */ - private String endpointProtocol; - - /** - * 区域域名,不带 host 和 protocol - */ - private String regionHost; - - /** - * bucket - */ - private String bucket; - - /** - * 对象 key 前缀,为 prefix/ 形式 - */ - private String objectKeyPrefix; -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/OdpsConnConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/OdpsConnConfig.java index 8f875b0..41753be 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/OdpsConnConfig.java +++ b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/OdpsConnConfig.java @@ -17,8 +17,12 @@ package org.secretflow.dataproxy.common.model.datasource.conn; import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import jakarta.validation.constraints.NotBlank; -import lombok.*; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; import org.secretflow.dataproxy.common.serializer.SensitiveDataSerializer; /** @@ -38,21 +42,18 @@ public class OdpsConnConfig implements ConnConfig { /** * access key id */ - @NotBlank @JsonSerialize(using = SensitiveDataSerializer.class) private String accessKeyId; /** * access key secret */ - @NotBlank @JsonSerialize(using = SensitiveDataSerializer.class) private String accessKeySecret; /** * endpoint */ - @NotBlank private String endpoint; /** diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/FileSystemLocationConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/FileSystemLocationConfig.java deleted file mode 100644 index 23c98cd..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/FileSystemLocationConfig.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.datasource.location; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.experimental.SuperBuilder; - -/** - * File system dataset location config - * - * @author muhong - * @date 2023-09-11 10:58 - */ -@Getter -@Setter -@SuperBuilder -@AllArgsConstructor -@NoArgsConstructor -public class FileSystemLocationConfig implements LocationConfig { - - /** - * Relative path - */ - private String relativePath; -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/JdbcLocationConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/JdbcLocationConfig.java deleted file mode 100644 index e3a066a..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/JdbcLocationConfig.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.datasource.location; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.experimental.SuperBuilder; - -/** - * JDBC dataset location config - * - * @author muhong - * @date 2023-09-07 20:52 - */ -@Getter -@Setter -@SuperBuilder -@AllArgsConstructor -@NoArgsConstructor -public class JdbcLocationConfig implements LocationConfig { - - /** - * table name - */ - private String table; -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MinioLocationConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MinioLocationConfig.java deleted file mode 100644 index 6eee4ab..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MinioLocationConfig.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.datasource.location; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.Setter; -import lombok.experimental.SuperBuilder; - -/** - * Minio dataset location config - * - * @author muhong - * @date 2023-08-30 19:15 - */ -@Getter -@Setter -@SuperBuilder -@AllArgsConstructor -public class MinioLocationConfig extends FileSystemLocationConfig { -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/OSSLocationConfig.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/OSSLocationConfig.java deleted file mode 100644 index f05787e..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/OSSLocationConfig.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.model.datasource.location; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Oss dataset location config - * @author yumu - * @date 2023/9/1 17:32 - */ -@Data -@AllArgsConstructor -@NoArgsConstructor -@Builder -public class OSSLocationConfig implements LocationConfig { - /** - * 文件key - */ - private String fileKey; -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/serializer/SensitiveDataSerializer.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/serializer/SensitiveDataSerializer.java index cca960c..2fe5394 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/serializer/SensitiveDataSerializer.java +++ b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/serializer/SensitiveDataSerializer.java @@ -23,7 +23,7 @@ import java.io.IOException; /** - * json 序列化字段脱敏器 + * json serialization field desensitize * * @author yuexie * @date 2024-07-08 diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/ArrowUtil.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/ArrowUtil.java new file mode 100644 index 0000000..7533c9b --- /dev/null +++ b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/ArrowUtil.java @@ -0,0 +1,51 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.common.utils; + +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; +import org.secretflow.dataproxy.common.exceptions.DataproxyException; + +/** + * @author yuexie + * @date 2024/11/8 15:51 + **/ +public class ArrowUtil { + + public static ArrowType parseKusciaColumnType(String type) { + // string integer float datetime timestamp + return switch (type) { + case "int8" -> Types.MinorType.TINYINT.getType(); + case "int16" -> Types.MinorType.SMALLINT.getType(); + case "int32" -> Types.MinorType.INT.getType(); + case "int64", "int" -> Types.MinorType.BIGINT.getType(); + case "unit8" -> Types.MinorType.UINT1.getType(); + case "uint16" -> Types.MinorType.UINT2.getType(); + case "uint32" -> Types.MinorType.UINT4.getType(); + case "uint64" -> Types.MinorType.UINT8.getType(); + case "float32" -> Types.MinorType.FLOAT4.getType(); + case "float64", "float" -> Types.MinorType.FLOAT8.getType(); + case "date32" -> Types.MinorType.DATEDAY.getType(); + case "date64" -> Types.MinorType.DATEMILLI.getType(); + case "bool" -> Types.MinorType.BIT.getType(); + case "string", "str" -> Types.MinorType.VARCHAR.getType(); + case "binary" -> Types.MinorType.VARBINARY.getType(); + default -> throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Unsupported field types: " + type); + }; + } +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/AssertUtil.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/AssertUtil.java new file mode 100644 index 0000000..df45e57 --- /dev/null +++ b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/AssertUtil.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.common.utils; + +import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; +import org.secretflow.dataproxy.common.exceptions.DataproxyException; + +/** + * @author yuexie + * @date 2024/11/1 19:46 + **/ +public class AssertUtil { + + public static void notNull(Object obj, String message) { + if (obj == null) { + throw new IllegalArgumentException(message); + } + } + + public static void isTrue(boolean expression, String message) { + if (!expression) { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, message); + } + } + +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/DPStringUtils.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/DPStringUtils.java deleted file mode 100644 index c5767a1..0000000 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/DPStringUtils.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.common.utils; - -import org.apache.commons.lang3.StringUtils; - -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; - -/** - * @author muhong - * @date 2023-10-19 11:13 - */ -public class DPStringUtils { - - /** - * 去前后包装标识 - * - * @param origin 原始字符串 - * @param identifier 包装标识 - * @return - */ - public static String removeDecorateIdentifier(String origin, String identifier) { - String removeStart = StringUtils.removeStart(origin, identifier); - return StringUtils.removeEnd(removeStart, identifier); - } - - /** - * 忽略空值拼接 - * - * @param delimiter 间隔符 - * @param array 待拼接数组 - * @return - */ - public static String joinWithoutEmpty(String delimiter, String... array) { - if (array == null || array.length == 0) { - return ""; - } - - List notEmptyList = Arrays.stream(array).filter(Objects::nonNull).collect(Collectors.toList()); - if (notEmptyList.isEmpty()) { - return ""; - } - - return StringUtils.join(notEmptyList, delimiter); - } - -} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/EnvVarUtils.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/EnvVarUtils.java new file mode 100644 index 0000000..e964ca7 --- /dev/null +++ b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/EnvVarUtils.java @@ -0,0 +1,72 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.common.utils; + +import lombok.extern.slf4j.Slf4j; + +import java.util.Optional; + +/** + * @author yuexie + * @date 2024/12/6 16:16 + **/ +@Slf4j +public class EnvVarUtils { + + + public static Optional getInt(String key) { + + String env = System.getenv(key); + if (env == null || env.isEmpty()) { + return Optional.empty(); + } + + if (!env.matches("\\d+")) { + log.warn("Env var `{}` is not a valid integer: {}", key, env); + return Optional.empty(); + } + return Optional.of(Integer.parseInt(env)); + } + + + public static Optional getLong(String key) { + String env = System.getenv(key); + if (env == null || env.isEmpty()) { + return Optional.empty(); + } + + if (!env.matches("\\d+")) { + log.warn("Env var `{}` is not a valid long: {}", key, env); + return Optional.empty(); + } + return Optional.of(Long.parseLong(env)); + } + + public static int getEffectiveValue(int v, int minValue, int maxValue) { + if (v <= minValue) { + return minValue; + } + return Math.min(v, maxValue); + } + + public static long getEffectiveValue(long v, long minValue, long maxValue) { + if (v <= minValue) { + return minValue; + } + return Math.min(v, maxValue); + } +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/IdUtils.java b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/IdUtils.java index 6cfeb71..c26718a 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/IdUtils.java +++ b/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/IdUtils.java @@ -16,9 +16,6 @@ package org.secretflow.dataproxy.common.utils; -import okio.ByteString; -import org.apache.commons.lang3.StringUtils; - import java.text.SimpleDateFormat; import java.util.Base64; import java.util.Date; @@ -39,16 +36,16 @@ public class IdUtils { private static final String idLetters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; /** - * 随机串长度 + * Random string length */ private static final int idLen = 8; /** - * 生成ID + * Generate ID with prefix and splitter * * @param prefix, 前缀 * @param splitter, 分隔符 - * @return + * @return ID */ public static String createId(String prefix, String splitter) { String dateText; @@ -80,19 +77,6 @@ public static String randomUUID() { return UUID.randomUUID().toString().replace("-", ""); } - /** - * 多个id生成联合id - * - * @param ids - * @return - */ - public static String combineIds(String... ids) { - return ByteString - .encodeUtf8(StringUtils.join(ids, "|")) - .sha256() - .hex(); - } - /** * 拼接两个 id 作为 traceId */ diff --git a/dataproxy-core/pom.xml b/dataproxy-core/pom.xml new file mode 100644 index 0000000..3799eaf --- /dev/null +++ b/dataproxy-core/pom.xml @@ -0,0 +1,51 @@ + + 4.0.0 + + org.secretflow + dataproxy + 0.0.1-SNAPSHOT + + + dataproxy-core + jar + + dataproxy-core + http://maven.apache.org + + + UTF-8 + + + + + org.apache.arrow + flight-core + + + + org.secretflow + dataproxy-common + + + + com.github.ben-manes.caffeine + caffeine + + + org.projectlombok + lombok + compile + + + + jakarta.validation + jakarta.validation-api + + + + + diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/ConfigLoader.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/ConfigLoader.java new file mode 100644 index 0000000..eb211f1 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/ConfigLoader.java @@ -0,0 +1,50 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.config; + +import java.util.Properties; + +/** + * @author yuexie + * @date 2024/11/28 16:40 + **/ +public interface ConfigLoader { + + /** + * load properties
+ * Read the information and load it into the properties passed in
+ * The reading order is sorted by {@link #getPriority()}
+ * + * @param properties properties + */ + void loadProperties(Properties properties); + + /** + * get priority:
+ * 0: default priority
+ * 1: highest priority
+ * 2: second highest priority
+ * ...
+ * 999: lowest priority
+ * The higher the value, the higher the priority + * + * @return priority + */ + default int getPriority() { + return 0; + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/DefaultFlightServerConfigLoader.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/DefaultFlightServerConfigLoader.java new file mode 100644 index 0000000..bc32252 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/DefaultFlightServerConfigLoader.java @@ -0,0 +1,65 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.config; + +import java.net.Inet4Address; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.util.Enumeration; +import java.util.Properties; + +/** + * @author yuexie + * @date 2024/11/4 00:02 + **/ +public class DefaultFlightServerConfigLoader implements ConfigLoader { + + /** + * load properties
+ * Read the information and load it into the properties passed in
+ * The reading order is sorted by {@link #getPriority()}
+ * + * @param properties properties + */ + @Override + public void loadProperties(Properties properties) { + try { + String localMachineHost = ""; + final Enumeration interfaces = NetworkInterface.getNetworkInterfaces(); + while (interfaces.hasMoreElements()) { + NetworkInterface networkInterface = interfaces.nextElement(); + + if (networkInterface.isLoopback() || !networkInterface.isUp()) { + continue; + } + + final Enumeration addresses = networkInterface.getInetAddresses(); + while (addresses.hasMoreElements()) { + InetAddress inetAddress = addresses.nextElement(); + if (!inetAddress.isLoopbackAddress() && inetAddress instanceof Inet4Address) { + localMachineHost = inetAddress.getHostAddress(); + } + } + } + properties.put(FlightServerConfigKey.HOST, localMachineHost); + } catch (SocketException e) { + throw new RuntimeException(e); + } + properties.put(FlightServerConfigKey.PORT, 8023); + } +} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataReader.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfig.java similarity index 59% rename from dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataReader.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfig.java index c509449..6e0a28f 100644 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataReader.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfig.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,23 +14,19 @@ * limitations under the License. */ -package org.secretflow.dataproxy.manager; +package org.secretflow.dataproxy.core.config; -import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.flight.Location; /** - * Dataset reader - * - * @author muhong - * @date 2023-08-21 17:48 - */ -public interface DataReader { + * @author yuexie + * @date 2024/10/30 16:14 + **/ +@Slf4j +public record FlightServerConfig(String host, int port) { - /** - * Build split dataset reader - * - * @param splitNumber Split number - * @return Split reader list - */ - List createSplitReader(int splitNumber); + public Location getLocation() { + return Location.forGrpcInsecure(host, port); + } } diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/IndexType.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfigKey.java similarity index 67% rename from dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/IndexType.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfigKey.java index 70f3247..0b3d458 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/IndexType.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerConfigKey.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,22 +14,16 @@ * limitations under the License. */ -package org.secretflow.dataproxy.common.model.dataset.format; +package org.secretflow.dataproxy.core.config; /** - * Table index type - * @author yumu - * @date 2023/9/4 10:22 - */ -public enum IndexType { + * @author yuexie + * @date 2024/10/30 16:11 + **/ +public interface FlightServerConfigKey { + + String HOST = "SERVICE_HOST"; - /** - * unique index - */ - UNIQUE, + String PORT = "SERVICE_PORT"; - /** - * common index - */ - INDEX } diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerContext.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerContext.java new file mode 100644 index 0000000..c620ba9 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/config/FlightServerContext.java @@ -0,0 +1,70 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.config; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +import java.util.Comparator; +import java.util.Optional; +import java.util.Properties; +import java.util.ServiceLoader; + +/** + * @author yuexie + * @date 2024/10/31 19:57 + **/ +@Getter +@Slf4j +public class FlightServerContext { + + private final FlightServerConfig flightServerConfig; + + private static final Properties CONFIG_PROPERTIES = new Properties(); + + private static class SingletonHolder { + public static final FlightServerContext INSTANCE = new FlightServerContext(); + } + + private FlightServerContext() { + init(); + flightServerConfig = new FlightServerConfig(get(FlightServerConfigKey.HOST, String.class), get(FlightServerConfigKey.PORT, Integer.class)); + } + + private void init() { + ServiceLoader serviceLoader = ServiceLoader.load(ConfigLoader.class); + + serviceLoader.stream() + .map(ServiceLoader.Provider::get) + .sorted(Comparator.comparingInt(ConfigLoader::getPriority)) + .forEach(configLoader -> configLoader.loadProperties(CONFIG_PROPERTIES)); + + CONFIG_PROPERTIES.forEach((k, v) -> log.info("load config: {}={}", k, v)); + } + + public static FlightServerContext getInstance() { + return SingletonHolder.INSTANCE; + } + + public static T getOrDefault(String key, Class tClass, T defaultValue) { + return Optional.ofNullable(CONFIG_PROPERTIES.get(key)).map(tClass::cast).orElse(defaultValue); + } + + public static T get(String key, Class tClass) { + return Optional.ofNullable(CONFIG_PROPERTIES.get(key)).map(tClass::cast).orElse(null); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractMultiTypeConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractMultiTypeConverter.java new file mode 100644 index 0000000..d1cc5fe --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractMultiTypeConverter.java @@ -0,0 +1,39 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 20:41 + **/ +public abstract class AbstractMultiTypeConverter extends AbstractValueConverter { + + protected final ValueConversionStrategy next; + + protected AbstractMultiTypeConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor); + this.next = next; + } + + protected void throwUnsupportedException(ValueVector value) { + throw new IllegalArgumentException("Unsupported type: " + value.getClass().getName()); + } + +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractValueConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractValueConverter.java new file mode 100644 index 0000000..8f5d414 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/AbstractValueConverter.java @@ -0,0 +1,36 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 17:34 + **/ +public abstract class AbstractValueConverter implements ValueConversionStrategy { + + private final ValueVisitor visitor; + + protected AbstractValueConverter(ValueVisitor visitor) { + this.visitor = visitor; + } + + protected S visit(Object value) { + return visitor.visit(value); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BigIntVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BigIntVectorConverter.java new file mode 100644 index 0000000..5fec58a --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BigIntVectorConverter.java @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 16:00 + **/ +public class BigIntVectorConverter extends AbstractMultiTypeConverter { + + + public BigIntVectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + + if (vector instanceof BigIntVector bigIntVector) { + bigIntVector.set(index, this.visit(value)); + } else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BitVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BitVectorConverter.java new file mode 100644 index 0000000..15bae44 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/BitVectorConverter.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 16:14 + **/ +public class BitVectorConverter extends AbstractValueConverter { + + + public BitVectorConverter(ValueVisitor visitor) { + super(visitor); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + + if (vector instanceof BitVector bitVector) { + bitVector.setSafe(index, this.visit(value) ? 1 : 0); + } else { + throw new IllegalArgumentException("BitVectorConsumer unsupported vector type: " + vector.getClass().getName()); + } + } +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MysqlLocationConfig.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DataProxyParamConverter.java similarity index 57% rename from dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MysqlLocationConfig.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DataProxyParamConverter.java index 2bc4794..5046469 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/location/MysqlLocationConfig.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DataProxyParamConverter.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,24 +14,20 @@ * limitations under the License. */ -package org.secretflow.dataproxy.common.model.datasource.location; +package org.secretflow.dataproxy.core.converter; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.Setter; -import lombok.experimental.SuperBuilder; +import org.secretflow.v1alpha1.kusciaapi.Flightinner; /** - * Mysql dataset location config - * - * @author muhong - * @date 2023-08-30 19:16 - */ -@Getter -@Setter -@SuperBuilder -@AllArgsConstructor -public class MysqlLocationConfig extends JdbcLocationConfig { + * @author yuexie + * @date 2024/10/31 19:38 + **/ +public interface DataProxyParamConverter { + + T convert(Flightinner.CommandDataMeshSqlQuery request); + + U convert(Flightinner.CommandDataMeshQuery request); + + V convert(Flightinner.CommandDataMeshUpdate request); } diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateDayVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateDayVectorConverter.java new file mode 100644 index 0000000..74dc6f7 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateDayVectorConverter.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/12/4 11:19 + **/ +public class DateDayVectorConverter extends AbstractMultiTypeConverter { + + public DateDayVectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof DateDayVector dateDayVector) { + dateDayVector.setSafe(index, this.visit(value)); + } else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throw new IllegalArgumentException("DateVectorConverter unsupported vector type: " + vector.getClass().getName()); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateMilliVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateMilliVectorConverter.java new file mode 100644 index 0000000..5dcfa34 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/DateMilliVectorConverter.java @@ -0,0 +1,44 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.DateMilliVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/12/4 16:27 + **/ +public class DateMilliVectorConverter extends AbstractMultiTypeConverter{ + + public DateMilliVectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + + if (vector instanceof DateMilliVector dateMilliVector) { + dateMilliVector.setSafe(index, this.visit(value)); + } else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float4VectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float4VectorConverter.java new file mode 100644 index 0000000..86d3dfb --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float4VectorConverter.java @@ -0,0 +1,46 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 16:10 + **/ +public class Float4VectorConverter extends AbstractMultiTypeConverter { + + + public Float4VectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + + if (vector instanceof Float4Vector float4Vector) { + float4Vector.set(index, this.visit(value)); + } else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float8VectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float8VectorConverter.java new file mode 100644 index 0000000..ac0d0d9 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/Float8VectorConverter.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 16:13 + **/ +public class Float8VectorConverter extends AbstractMultiTypeConverter { + + public Float8VectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof Float8Vector float8Vector) { + float8Vector.set(index, this.visit(value)); + } else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/IntVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/IntVectorConverter.java new file mode 100644 index 0000000..2473998 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/IntVectorConverter.java @@ -0,0 +1,44 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 15:28 + **/ +public class IntVectorConverter extends AbstractMultiTypeConverter { + + + public IntVectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof IntVector intVector) { + intVector.set(index, this.visit(value)); + }else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/SmallIntVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/SmallIntVectorConverter.java new file mode 100644 index 0000000..5717ad4 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/SmallIntVectorConverter.java @@ -0,0 +1,44 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 15:59 + **/ +public class SmallIntVectorConverter extends AbstractMultiTypeConverter { + + + public SmallIntVectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof SmallIntVector smallIntVector) { + smallIntVector.set(index, this.visit(value)); + } else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeMilliVectorConvertor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeMilliVectorConvertor.java new file mode 100644 index 0000000..3b64136 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeMilliVectorConvertor.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/12/4 19:13 + **/ +public class TimeMilliVectorConvertor extends AbstractMultiTypeConverter{ + + public TimeMilliVectorConvertor(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof TimeMilliVector timeMilliVector) { + timeMilliVector.setSafe(index, this.visit(value)); + } else if (next != null){ + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeStampNanoVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeStampNanoVectorConverter.java new file mode 100644 index 0000000..49e559f --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TimeStampNanoVectorConverter.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/12/4 16:32 + **/ +public class TimeStampNanoVectorConverter extends AbstractValueConverter { + public TimeStampNanoVectorConverter(ValueVisitor visitor) { + super(visitor); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof TimeStampMilliVector timeStampMilliVector) { + timeStampMilliVector.set(index, this.visit(value)); + } else { + throw new IllegalArgumentException("TimeStampNanoVectorConverter unsupported vector type: " + vector.getClass().getName()); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TinyIntVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TinyIntVectorConverter.java new file mode 100644 index 0000000..9584fb2 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/TinyIntVectorConverter.java @@ -0,0 +1,44 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.ValueVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 16:01 + **/ +public class TinyIntVectorConverter extends AbstractMultiTypeConverter { + + + public TinyIntVectorConverter(ValueVisitor visitor, ValueConversionStrategy next) { + super(visitor, next); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof TinyIntVector tinyIntVector) { + tinyIntVector.set(index, this.visit(value)); + } else if (next != null) { + next.convertAndSet(vector, index, value); + } else { + throwUnsupportedException(vector); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/ValueConversionStrategy.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/ValueConversionStrategy.java new file mode 100644 index 0000000..062abf8 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/ValueConversionStrategy.java @@ -0,0 +1,27 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.ValueVector; + +/** + * @author yuexie + * @date 2024/11/1 15:25 + **/ +public interface ValueConversionStrategy { + void convertAndSet(ValueVector vector, int index, Object value); +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/VarCharVectorConverter.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/VarCharVectorConverter.java new file mode 100644 index 0000000..35e913e --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/converter/VarCharVectorConverter.java @@ -0,0 +1,41 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.converter; + +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarCharVector; +import org.secretflow.dataproxy.core.visitor.ValueVisitor; + +/** + * @author yuexie + * @date 2024/11/1 16:04 + **/ +public class VarCharVectorConverter extends AbstractValueConverter { + + public VarCharVectorConverter(ValueVisitor visitor) { + super(visitor); + } + + @Override + public void convertAndSet(ValueVector vector, int index, Object value) { + if (vector instanceof VarCharVector varCharVector) { + varCharVector.setSafe(index, this.visit(value)); + } else { + throw new IllegalArgumentException("VarCharVectorConsumer unsupported vector type: " + vector.getClass().getName()); + } + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/param/ParamWrapper.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/param/ParamWrapper.java new file mode 100644 index 0000000..7ac1211 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/param/ParamWrapper.java @@ -0,0 +1,55 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.param; + +/** + * @author yuexie + * @date 2024/10/31 15:59 + **/ +public record ParamWrapper(String producerKey, Object param) { + + /** + * of param + * + * @param param param + * @return param wrapper + */ + public static ParamWrapper of(String producerKey, Object param) { + if (param == null) { + throw new IllegalArgumentException("param is null"); + } + return new ParamWrapper(producerKey, param); + } + + /** + * unwrap param + * + * @param distClas dist class + * @param dist class type + * @return dist class + */ + public D unwrap(Class distClas) { + if (param == null) { + throw new IllegalArgumentException("param is null"); + } + + if (!distClas.isAssignableFrom(param.getClass())) { + throw new IllegalArgumentException("param is not assignable from " + distClas.getName()); + } + return distClas.cast(param); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractReader.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractReader.java new file mode 100644 index 0000000..9b0c974 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractReader.java @@ -0,0 +1,44 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.reader; + +/** + * @author yuexie + * @date 2024/11/1 10:56 + **/ +public abstract class AbstractReader implements Reader { + + private final T param; + private final Sender sender; + + public AbstractReader(T param, Sender sender) { + this.param = param; + this.sender = sender; + } + + protected abstract void read(T param); + + @Override + public void read() { + this.read(param); + } + + public void put(R record) throws Exception { + sender.put(record); + } + +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractSender.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractSender.java new file mode 100644 index 0000000..afb1038 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/AbstractSender.java @@ -0,0 +1,173 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.reader; + +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.vector.FixedWidthVector; +import org.apache.arrow.vector.VariableWidthVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.util.ValueVectorUtility; + +import java.io.Closeable; +import java.util.concurrent.LinkedBlockingQueue; + +/** + * @author yuexie + * @date 2024/11/1 11:00 + **/ +@Slf4j +public abstract class AbstractSender implements Sender, AutoCloseable { + + /** + * Queue, used to store records to be sent + */ + private final LinkedBlockingQueue recordQueue; + + private final VectorSchemaRoot root; + + /** + * Estimated number of records to be sent + */ + private final int estimatedRecordCount; + + /** + * Constructor + * + * @param estimatedRecordCount Estimated number of records to be sent + * @param recordQueue Queue, used to store records to be sent + * @param root Arrow vector schema root + */ + public AbstractSender(int estimatedRecordCount, LinkedBlockingQueue recordQueue, VectorSchemaRoot root) { + this.recordQueue = recordQueue; + this.root = root; + this.estimatedRecordCount = estimatedRecordCount; + this.preAllocate(); + } + + @Override + public void put(T record) throws InterruptedException { + recordQueue.put(record); + log.trace("recordQueue size: {}, recordQueue offer record: {}", recordQueue.size(), record); + } + + @Override + public void send() { + preAllocate(); + log.info("start send"); + try { + int takeRecordCount = 0; + + for (;;) { + T record = recordQueue.take(); + log.debug("recordQueue take record: {}", record); + if (isOver(record)) { + log.debug("recordQueue take record take Count: {}", takeRecordCount); + break; + } + ValueVectorUtility.ensureCapacity(root, takeRecordCount + 1); + this.toArrowVector(record, root, takeRecordCount); + takeRecordCount++; + + if (takeRecordCount % 300_000 == 0) { + break; + } + } + root.setRowCount(takeRecordCount); + log.info("send record take Count: {}", takeRecordCount); + } catch (InterruptedException e) { + log.error("send record interrupted", e); + Thread.currentThread().interrupt(); + } + } + + protected abstract void toArrowVector(T record, VectorSchemaRoot root, int takeRecordCount); + + protected abstract boolean isOver(T record); + + protected VectorSchemaRoot getRoot() { + return root; + } + + + /** + * Closes this resource, relinquishing any underlying resources. + * This method is invoked automatically on objects managed by the + * {@code try}-with-resources statement. + * + * @throws Exception if this resource cannot be closed + * @apiNote While this interface method is declared to throw {@code + * Exception}, implementers are strongly encouraged to + * declare concrete implementations of the {@code close} method to + * throw more specific exceptions, or to throw no exception at all + * if the close operation cannot fail. + * + *

Cases where the close operation may fail require careful + * attention by implementers. It is strongly advised to relinquish + * the underlying resources and to internally mark the + * resource as closed, prior to throwing the exception. The {@code + * close} method is unlikely to be invoked more than once and so + * this ensures that the resources are released in a timely manner. + * Furthermore it reduces problems that could arise when the resource + * wraps, or is wrapped, by another resource. + * + *

Implementers of this interface are also strongly advised + * to not have the {@code close} method throw {@link + * InterruptedException}. + *

+ * This exception interacts with a thread's interrupted status, + * and runtime misbehavior is likely to occur if an {@code + * InterruptedException} is {@linkplain Throwable#addSuppressed + * suppressed}. + *

+ * More generally, if it would cause problems for an + * exception to be suppressed, the {@code AutoCloseable.close} + * method should not throw it. + * + *

Note that unlike the {@link Closeable#close close} + * method of {@link Closeable}, this {@code close} method + * is not required to be idempotent. In other words, + * calling this {@code close} method more than once may have some + * visible side effect, unlike {@code Closeable.close} which is + * required to have no effect if called more than once. + *

+ * However, implementers of this interface are strongly encouraged + * to make their {@code close} methods idempotent. + */ + @Override + public void close() throws Exception { + if (recordQueue != null) { + recordQueue.clear(); + } + } + + /** + * Pre-application for arrow vector memory + */ + private void preAllocate() { + + ValueVectorUtility.preAllocate(root, estimatedRecordCount); + + root.getFieldVectors().forEach(fieldVector -> { + if (fieldVector instanceof FixedWidthVector baseFixedWidthVector) { + baseFixedWidthVector.allocateNew(estimatedRecordCount); + } else if (fieldVector instanceof VariableWidthVector baseVariableWidthVector) { + baseVariableWidthVector.allocateNew(estimatedRecordCount * 32); + } + }); + root.clear(); + } +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/StructuredDataSchema.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/ReadJobContext.java similarity index 55% rename from dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/StructuredDataSchema.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/ReadJobContext.java index 7c54108..c267834 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/StructuredDataSchema.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/ReadJobContext.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,29 +14,31 @@ * limitations under the License. */ -package org.secretflow.dataproxy.common.model.dataset.schema; +package org.secretflow.dataproxy.core.reader; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; import java.util.List; /** - * 结构化数据schema - * - * @author muhong - * @date 2023-08-30 19:38 - */ -@Data -@AllArgsConstructor -@NoArgsConstructor -@Builder -public class StructuredDataSchema implements FastDFSchema { + * @author yuexie + * @date 2024/10/31 21:12 + **/ +public interface ReadJobContext { /** - * 字段列表 + * split + * @param num + * @return */ - private List fieldList; + List split(int num); + + boolean hasNext(); + + boolean initAndStartSender(VectorSchemaRoot root); + + void waiteFinished(); + + Schema getResultSchema(); } diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/SplitReader.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Reader.java similarity index 62% rename from dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/SplitReader.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Reader.java index e349eda..afa19f8 100644 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/SplitReader.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Reader.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,22 +14,14 @@ * limitations under the License. */ -package org.secretflow.dataproxy.manager; - -import org.apache.arrow.vector.ipc.ArrowReader; +package org.secretflow.dataproxy.core.reader; /** - * Split dataset reader - * - * @author muhong - * @date 2023-08-31 17:04 - */ -public interface SplitReader { + * @author yuexie + * @date 2024/11/1 10:50 + **/ +public interface Reader { + + void read(); - /** - * Get arrow data reader - * - * @return Arrow reader - */ - ArrowReader startRead(); -} \ No newline at end of file +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Sender.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Sender.java new file mode 100644 index 0000000..499ee95 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/reader/Sender.java @@ -0,0 +1,30 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.reader; + +/** + * @author yuexie + * @date 2024/11/1 10:53 + **/ +public interface Sender { + + void put(T record) throws Exception; + + void putOver() throws InterruptedException; + + void send(); +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/CaffeineDataRepository.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/CaffeineDataRepository.java new file mode 100644 index 0000000..6f2b2c8 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/CaffeineDataRepository.java @@ -0,0 +1,78 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.repository; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import org.secretflow.dataproxy.core.param.ParamWrapper; + +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +/** + * @author yuexie + * @date 2024/10/31 16:38 + **/ +public final class CaffeineDataRepository implements ParamWrapperRepository { + + private static final int timeout = 300; + + private final Cache cache; + + private CaffeineDataRepository() { + cache = Caffeine.newBuilder() + .initialCapacity(4) + .maximumSize(32) + .expireAfterWrite(timeout, TimeUnit.SECONDS) + .build(); + } + + private static final class CaffeineDataRepositoryHolder { + private static final CaffeineDataRepository instance = new CaffeineDataRepository(); + } + + public static CaffeineDataRepository getInstance() { + return CaffeineDataRepositoryHolder.instance; + } + + + @Override + public void put(String key, ParamWrapper value) { + cache.put(key, value); + } + + @Override + public Optional getIfPresent(String key) { + return Optional.ofNullable(cache.getIfPresent(key)); + } + + @Override + public void remove(String key) { + cache.invalidate(key); + } + + @Override + public void invalidate(String key) { + cache.invalidate(key); + } + + @Override + public void clear() { + cache.invalidateAll(); + } + +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MinioConnConfig.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/ParamWrapperRepository.java similarity index 56% rename from dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MinioConnConfig.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/ParamWrapperRepository.java index 3fc62cb..b7b0695 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MinioConnConfig.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/repository/ParamWrapperRepository.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,23 +14,25 @@ * limitations under the License. */ -package org.secretflow.dataproxy.common.model.datasource.conn; +package org.secretflow.dataproxy.core.repository; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.Setter; -import lombok.experimental.SuperBuilder; +import org.secretflow.dataproxy.core.param.ParamWrapper; + +import java.util.Optional; /** - * Minio datasource connection config - * - * @author yumu - * @date 2023/8/30 16:48 - */ -@Getter -@Setter -@SuperBuilder -@AllArgsConstructor -public class MinioConnConfig extends ObjectFileSystemConnConfig { + * @author yuexie + * @date 2024/10/31 17:14 + **/ +public interface ParamWrapperRepository { + + void put(String key, ParamWrapper value); + + Optional getIfPresent(String key); + + void remove(String key); + + void invalidate(String key); + void clear(); } diff --git a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/TicketService.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/service/TicketService.java similarity index 57% rename from dataproxy-service/src/main/java/org/secretflow/dataproxy/service/TicketService.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/service/TicketService.java index 30a5aa7..c4695f4 100644 --- a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/TicketService.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/service/TicketService.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,29 +14,30 @@ * limitations under the License. */ -package org.secretflow.dataproxy.service; +package org.secretflow.dataproxy.core.service; -import org.secretflow.dataproxy.common.model.command.Command; +import org.secretflow.dataproxy.core.param.ParamWrapper; /** - * @author muhong - * @date 2023-08-31 11:02 - */ + * @author yuexie + * @date 2024/10/31 15:55 + **/ public interface TicketService { /** - * 根据指令生成ticket + * Generate tickets according to instructions * - * @param command 数据指令 - * @return ticket + * @param paramWrapper ParamWrapper + * @return ticket byte array */ - byte[] generateTicket(Command command); + byte[] generateTicket(ParamWrapper paramWrapper); /** - * 根据ticket获取数据指令 + * Get data instructions based on ticket * * @param ticket ticket - * @return 数据指令 + * @return ParamWrapper */ - Command getCommandByTicket(byte[] ticket); -} \ No newline at end of file + ParamWrapper getParamWrapper(byte[] ticket); + +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/service/impl/CacheTicketService.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/service/impl/CacheTicketService.java new file mode 100644 index 0000000..3e5dcc7 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/service/impl/CacheTicketService.java @@ -0,0 +1,87 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.service.impl; + +import org.secretflow.dataproxy.common.utils.IdUtils; +import org.secretflow.dataproxy.core.param.ParamWrapper; +import org.secretflow.dataproxy.core.repository.CaffeineDataRepository; +import org.secretflow.dataproxy.core.repository.ParamWrapperRepository; +import org.secretflow.dataproxy.core.service.TicketService; + +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +/** + * @author yuexie + * @date 2024/10/31 16:27 + **/ +public final class CacheTicketService implements TicketService { + + private static final boolean isSingleUse = true; + + private final ParamWrapperRepository paramWrapperRepository; + + private CacheTicketService() { + paramWrapperRepository = CaffeineDataRepository.getInstance(); + } + + private static class SingletonHolder { + private static final CacheTicketService INSTANCE = new CacheTicketService(); + } + + /** + * Get instance + * + * @return CacheTicketService + */ + public static CacheTicketService getInstance() { + return SingletonHolder.INSTANCE; + } + + /** + * Generate tickets according to instructions + * + * @param paramWrapper ParamWrapper + * @return ticket byte array + */ + @Override + public byte[] generateTicket(ParamWrapper paramWrapper) { + String s = IdUtils.randomUUID(); + paramWrapperRepository.put(s, paramWrapper); + return s.getBytes(StandardCharsets.UTF_8); + } + + /** + * Get data instructions based on ticket + * + * @param ticket ticket + * @return ParamWrapper + */ + @Override + public ParamWrapper getParamWrapper(byte[] ticket) { + + final String key = new String(ticket, StandardCharsets.UTF_8); + Optional wrapperOptional = paramWrapperRepository.getIfPresent(key); + +// if (isSingleUse) { +// paramWrapperRepository.invalidate(key); +// } + +// return wrapperOptional.orElseThrow(() -> DataproxyException.of(DataproxyErrorCode.TICKET_UNAVAILABLE, key)); + return wrapperOptional.orElse(null); + } +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/DataField.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/spi/producer/DataProxyFlightProducer.java similarity index 51% rename from dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/DataField.java rename to dataproxy-core/src/main/java/org/secretflow/dataproxy/core/spi/producer/DataProxyFlightProducer.java index 0eb4bcb..1793aaa 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/schema/DataField.java +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/spi/producer/DataProxyFlightProducer.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,40 +14,28 @@ * limitations under the License. */ -package org.secretflow.dataproxy.common.model.dataset.schema; +package org.secretflow.dataproxy.core.spi.producer; -import lombok.*; + +import org.apache.arrow.flight.FlightProducer; +import org.apache.arrow.vector.types.pojo.Schema; + +import java.util.Collections; /** - * 数据列信息 - * - * @author muhong - * @date 2023-08-30 19:38 - */ -@Getter -@Setter -@Builder -@AllArgsConstructor -@NoArgsConstructor -public class DataField { + * @author yuexie + * @date 2024/10/30 17:29 + **/ +public interface DataProxyFlightProducer extends FlightProducer { - /** - * 字段名称 - */ - private String name; - /** - * 字段描述 - */ - private String description; + Schema DEFACT_SCHEMA = new Schema(Collections.emptyList()); /** - * 字段类型 + * Obtain the data type used for registration name and identification processing. + * + * @return producer name */ - private DataFieldTypeEnum type; + String getProducerName(); - /** - * 是否可为空 - */ - private Boolean nullable; } diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/BooleanValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/BooleanValueVisitor.java new file mode 100644 index 0000000..1eae67f --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/BooleanValueVisitor.java @@ -0,0 +1,75 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +import javax.annotation.Nonnull; + +/** + * @author yuexie + * @date 2024/11/1 20:08 + **/ +public class BooleanValueVisitor implements ValueVisitor{ + + @Override + public Boolean visit(boolean value) { + return value; + } + + @Override + public Boolean visit(@Nonnull Short value) { + return value > 0; + } + + @Override + public Boolean visit(Integer value) { + return value > 0; + } + + @Override + public Boolean visit(Long value) { + return value > 0; + } + + @Override + public Boolean visit(Float value) { + return value > 0; + } + + @Override + public Boolean visit(Double value) { + return value > 0; + } + + @Override + public Boolean visit(String value) { + return switch (value.toLowerCase()) { + case "true", "t", "yes", "y", "1" -> true; + + case "false", "f", "no", "n", "0" -> false; + + default -> throw new IllegalStateException("BooleanValueVisitor unexpected String value: " + value); + }; + } + + @Override + public Boolean visit(Object value) { + if (value instanceof Boolean booleanValue) { + return booleanValue; + } + return this.visit(value.toString()); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteArrayValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteArrayValueVisitor.java new file mode 100644 index 0000000..680d4b9 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteArrayValueVisitor.java @@ -0,0 +1,80 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +import lombok.extern.slf4j.Slf4j; + +import java.nio.charset.StandardCharsets; +import java.util.Date; + +/** + * @author yuexie + * @date 2024/11/1 20:28 + **/ +@Slf4j +public class ByteArrayValueVisitor implements ValueVisitor{ + + @Override + public byte[] visit(Short value) { + return this.visit((Object) value); + } + + @Override + public byte[] visit(Integer value) { + return this.visit((Object) value); + } + + @Override + public byte[] visit(Long value) { + return this.visit((Object) value); + } + + @Override + public byte[] visit(Float value) { + return this.visit((Object) value); + } + + @Override + public byte[] visit(Double value) { + return this.visit((Object) value); + } + + @Override + public byte[] visit(Date value) { + return this.visit((Object) value); + } + + @Override + public byte[] visit(String value) { + return value.getBytes(StandardCharsets.UTF_8); + } + + @Override + public byte[] visit(byte[] value) { + return value; + } + + @Override + public byte[] visit(Object value) { + + if (value instanceof byte[] bytes) { + return this.visit(bytes); + } + + return this.visit(String.valueOf(value)); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteValueVisitor.java new file mode 100644 index 0000000..3f8e909 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ByteValueVisitor.java @@ -0,0 +1,55 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +/** + * @author yuexie + * @date 2024/11/1 20:25 + **/ +public class ByteValueVisitor implements ValueVisitor{ + + @Override + public Byte visit(boolean value) { + return value? (byte)1: (byte)0; + } + + @Override + public Byte visit(Short value) { + return value.byteValue(); + } + + @Override + public Byte visit(Integer value) { + return value.byteValue(); + } + + @Override + public Byte visit(Long value) { + return value.byteValue(); + } + + @Override + public Byte visit(Float value) { + return value.byteValue(); + } + + @Override + public Byte visit(Double value) { + return value.byteValue(); + } + +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/DoubleValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/DoubleValueVisitor.java new file mode 100644 index 0000000..9b59ee5 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/DoubleValueVisitor.java @@ -0,0 +1,67 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +/** + * @author yuexie + * @date 2024/11/1 20:22 + **/ +public class DoubleValueVisitor implements ValueVisitor { + + @Override + public Double visit(boolean value) { + return value ? 1.0 : 0.0; + } + + @Override + public Double visit(Short value) { + return value.doubleValue(); + } + + @Override + public Double visit(Integer value) { + return value.doubleValue(); + } + + @Override + public Double visit(Long value) { + return value.doubleValue(); + } + + @Override + public Double visit(Float value) { + return value.doubleValue(); + } + + @Override + public Double visit(Double value) { + return value; + } + + @Override + public Double visit(String value) { + return Double.valueOf(value); + } + + @Override + public Double visit(Object value) { + if (value instanceof Double doubleValue) { + return doubleValue; + } + return Double.valueOf(value.toString()); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/FloatValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/FloatValueVisitor.java new file mode 100644 index 0000000..03a58f2 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/FloatValueVisitor.java @@ -0,0 +1,67 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +/** + * @author yuexie + * @date 2024/11/1 20:19 + **/ +public class FloatValueVisitor implements ValueVisitor{ + @Override + public Float visit(boolean value) { + return value ? 1f : 0f; + } + + @Override + public Float visit(Short value) { + return value.floatValue(); + } + + @Override + public Float visit(Integer value) { + return value.floatValue(); + } + + @Override + public Float visit(Long value) { + return value.floatValue(); + } + + @Override + public Float visit(Float value) { + return value; + } + + @Override + public Float visit(Double value) { + return value.floatValue(); + } + + @Override + public Float visit(String value) { + return Float.valueOf(value); + } + + @Override + public Float visit(Object value) { + + if (value instanceof Float floatValue) { + return this.visit(floatValue); + } + return Float.valueOf(String.valueOf(value)); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/IntegerValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/IntegerValueVisitor.java new file mode 100644 index 0000000..84a36ff --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/IntegerValueVisitor.java @@ -0,0 +1,96 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +import lombok.extern.slf4j.Slf4j; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZonedDateTime; +import java.util.Date; + +/** + * @author yuexie + * @date 2024/11/1 16:42 + **/ +@Slf4j +public class IntegerValueVisitor implements ValueVisitor { + + @Override + public Integer visit(String value) { + return Integer.valueOf(value); + } + + + @Override + public Integer visit(Object value) { + + if (value instanceof Integer integer) { + return this.visit(integer); + } else if (value instanceof Date dateValue) { + return this.visit(dateValue); + } else if (value instanceof LocalDateTime localDateTime) { + return this.visit(localDateTime); + } else if (value instanceof ZonedDateTime zonedDateTime) { + return this.visit(zonedDateTime); + } else if (value instanceof LocalDate localDate) { + return this.visit(localDate); + } + + return Integer.valueOf(value.toString()); + } + + @Override + public Integer visit(Long value) { + return value.intValue(); + } + + @Override + public Integer visit(Double value) { + return value.intValue(); + } + + @Override + public Integer visit(boolean value) { + return value ? 1 : 0; + } + + @Override + public Integer visit(Float value) { + return value.intValue(); + } + + @Override + public Integer visit(Short value) { + return value.intValue(); + } + + @Override + public Integer visit(Integer value) { + return value; + } + + @Override + public Integer visit(Date value) { + return (int) value.getTime(); + } + + @Override + public Integer visit(LocalDate value) { + return (int) value.toEpochDay(); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/LongValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/LongValueVisitor.java new file mode 100644 index 0000000..eb23be6 --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/LongValueVisitor.java @@ -0,0 +1,117 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +import lombok.extern.slf4j.Slf4j; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.Date; + +/** + * @author yuexie + * @date 2024/11/1 20:01 + **/ +@Slf4j +public class LongValueVisitor implements ValueVisitor { + + @Override + public Long visit(Long value) { + return value; + } + @Override + public Long visit(String value) { + return Long.valueOf(value); + } + + @Override + public Long visit(Object value) { + + log.debug("type: {}, value: {}",value.getClass().getName(), value); + + if (value instanceof Long longValue) { + return visit(longValue); + } else if (value instanceof Date dateValue) { + return this.visit(dateValue); + } else if (value instanceof LocalDateTime localDateTime) { + return this.visit(localDateTime); + } else if (value instanceof ZonedDateTime zonedDateTime) { + return this.visit(zonedDateTime); + } else if (value instanceof LocalDate localDate) { + return this.visit(localDate); + } else if (value instanceof Instant instant) { + return this.visit(instant); + } + + return visit(value.toString()); + } + + @Override + public Long visit(Double value) { + return value.longValue(); + } + + @Override + public Long visit(Date value) { + return value.getTime(); + } + + @Override + public Long visit(boolean value) { + return value ? 1L : 0L; + } + + @Override + public Long visit(Short value) { + return value.longValue(); + } + + @Override + public Long visit(Integer value) { + return value.longValue(); + } + + @Override + public Long visit(Float value) { + return value.longValue(); + } + + @Override + public Long visit(ZonedDateTime value) { + return value.toInstant().toEpochMilli(); + } + + @Override + public Long visit(LocalDateTime value) { + return value.toInstant(ZoneOffset.of(ZoneId.systemDefault().getId())).toEpochMilli(); + } + + @Override + public Long visit(LocalDate value) { + return value.toEpochDay(); + } + + @Override + public Long visit(Instant value) { + log.debug("visit instant: {}", value.toEpochMilli()); + return value.toEpochMilli(); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ShortValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ShortValueVisitor.java new file mode 100644 index 0000000..72144fc --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ShortValueVisitor.java @@ -0,0 +1,68 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +/** + * @author yuexie + * @date 2024/11/1 20:24 + **/ +public class ShortValueVisitor implements ValueVisitor { + @Override + public Short visit(boolean value) { + return value ? (short) 1 : (short) 0; + } + + @Override + public Short visit(Short value) { + return value; + } + + @Override + public Short visit(Integer value) { + return value.shortValue(); + } + + @Override + public Short visit(Long value) { + return value.shortValue(); + } + + @Override + public Short visit(Float value) { + return value.shortValue(); + } + + @Override + public Short visit(Double value) { + return value.shortValue(); + } + + @Override + public Short visit(String value) { + return Short.valueOf(value); + } + + @Override + public Short visit(Object value) { + + if (value instanceof Short shortValue) { + return this.visit(shortValue); + } + + return this.visit(value.toString()); + } +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ValueVisitor.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ValueVisitor.java new file mode 100644 index 0000000..2db703e --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/visitor/ValueVisitor.java @@ -0,0 +1,89 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.visitor; + +import jakarta.validation.constraints.NotNull; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZonedDateTime; +import java.util.Date; + +/** + * @author yuexie + * @date 2024/11/1 16:48 + **/ +public interface ValueVisitor { + + default T visit(@NotNull Integer value) { + throw new UnsupportedOperationException("Integer not supported"); + } + + default T visit(@NotNull Short value) { + throw new UnsupportedOperationException("Short not supported"); + } + + default T visit(@NotNull Long value) { + throw new UnsupportedOperationException("Long not supported"); + } + + default T visit(@NotNull Double value) { + throw new UnsupportedOperationException("Double not supported"); + } + + default T visit(@NotNull Float value) { + throw new UnsupportedOperationException("Float not supported"); + } + + default T visit(@NotNull boolean value) { + throw new UnsupportedOperationException("Boolean not supported"); + } + + default T visit(@NotNull Date value) { + throw new UnsupportedOperationException("Date not supported"); + } + + default T visit(@NotNull String value) { + throw new UnsupportedOperationException("String not supported"); + } + + default T visit(@NotNull byte[] value) { + throw new UnsupportedOperationException("byte[] not supported"); + } + + default T visit(@NotNull Object value) { + throw new UnsupportedOperationException("Object not supported"); + } + + default T visit(@NotNull ZonedDateTime value) { + throw new UnsupportedOperationException("Object not supported"); + } + + default T visit(@NotNull LocalDateTime value) { + throw new UnsupportedOperationException("Object not supported"); + } + + default T visit(@NotNull LocalDate value) { + throw new UnsupportedOperationException("Object not supported"); + } + + default T visit(@NotNull Instant value) { + throw new UnsupportedOperationException("Object not supported"); + } + +} diff --git a/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/writer/Writer.java b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/writer/Writer.java new file mode 100644 index 0000000..4941fcb --- /dev/null +++ b/dataproxy-core/src/main/java/org/secretflow/dataproxy/core/writer/Writer.java @@ -0,0 +1,31 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.core.writer; + +import org.apache.arrow.vector.VectorSchemaRoot; + +/** + * @author yuexie + * @date 2024/11/8 15:48 + **/ +public interface Writer { + + void write(VectorSchemaRoot root); + + void flush(); + +} diff --git a/dataproxy-core/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader b/dataproxy-core/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader new file mode 100644 index 0000000..e26036c --- /dev/null +++ b/dataproxy-core/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader @@ -0,0 +1 @@ +org.secretflow.dataproxy.core.config.DefaultFlightServerConfigLoader \ No newline at end of file diff --git a/dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/DPFlightClient.java b/dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/DPFlightClient.java index 12fa2d2..942bc53 100644 --- a/dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/DPFlightClient.java +++ b/dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/DPFlightClient.java @@ -79,7 +79,7 @@ public void downloadStructDataAndPrint(FlightInfo flightInfo) { ArrowStreamWriter writer = new ArrowStreamWriter(vectorSchemaRootReceived, null, Channels.newChannel(out))) { writer.start(); writer.writeBatch(); - System.out.println(vectorSchemaRootReceived.contentToTSVString()); +// System.out.println(vectorSchemaRootReceived.contentToTSVString()); } } } catch (Exception e) { diff --git a/dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/config/OdpsKusciaConnectorConfig.java b/dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/config/OdpsKusciaConnectorConfig.java new file mode 100644 index 0000000..486f479 --- /dev/null +++ b/dataproxy-integration-tests/src/main/java/org/secretflow/dataproxy/integration/tests/config/OdpsKusciaConnectorConfig.java @@ -0,0 +1,80 @@ +/* + * Copyright 2023 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.integration.tests.config; + +import org.secretflow.dataproxy.integration.tests.KusciaConnectorConfig; +import org.secretflow.v1alpha1.common.Common; +import org.secretflow.v1alpha1.kusciaapi.Domaindata; +import org.secretflow.v1alpha1.kusciaapi.Domaindatasource; + +/** + * @author muhong + * @date 2023-11-17 11:08 + */ +public class OdpsKusciaConnectorConfig implements KusciaConnectorConfig { + + private final static String ENDPOINT = ""; + private final static String AK = ""; + private final static String SK = ""; + private final static String PROJECT_NAME = ""; + + @Override + public Domaindatasource.DomainDataSource getDatasource() { + + return Domaindatasource.DomainDataSource + .newBuilder() + .setDatasourceId("default-datasource") + .setType("odps") + .setStatus("Available") + .setInfo(Domaindatasource.DataSourceInfo.newBuilder() + .setOdps(odpsDomainDataSourceInfo()) + .build()) + .build(); + } + + @Override + public Domaindata.DomainData getDataset() { + return odpsDomainDataInfo(); + } + + private static Domaindatasource.OdpsDataSourceInfo odpsDomainDataSourceInfo() { + return Domaindatasource.OdpsDataSourceInfo.newBuilder() + .setEndpoint(ENDPOINT) + .setAccessKeyId(AK) + .setAccessKeySecret(SK) + .setProject(PROJECT_NAME) + .build(); + } + + private static Domaindata.DomainData odpsDomainDataInfo() { + return Domaindata.DomainData.newBuilder() + .setType("table") + .setDatasourceId("default-datasource") +// .setRelativeUri("test_user1") + .setRelativeUri("test_user") +// .setRelativeUri("39splitouput5") +// .setRelativeUri("alice_table") + .setFileFormat(Common.FileFormat.CSV) + .addColumns(Common.DataColumn.newBuilder().setType("str").setName("name").setComment("").build()) +// .addColumns(Common.DataColumn.newBuilder().setType("str").setName("id1").setComment("").build()) + .addColumns(Common.DataColumn.newBuilder().setType("int").setName("age").setComment("").build()) + .addColumns(Common.DataColumn.newBuilder().setType("str").setName("job").setComment("").build()) +// .addColumns(Common.DataColumn.newBuilder().setType("str").setName("col2_string").setComment("").build()) + .addColumns(Common.DataColumn.newBuilder().setType("bool").setName("boolean_col").setComment("boolean_col").build()) + .build(); + } +} diff --git a/dataproxy-manager/pom.xml b/dataproxy-manager/pom.xml deleted file mode 100644 index 72260a8..0000000 --- a/dataproxy-manager/pom.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - 4.0.0 - - org.secretflow - dataproxy - 0.0.1-SNAPSHOT - - - dataproxy-manager - - - - org.secretflow - dataproxy-common - - - - - org.apache.hadoop - hadoop-common - - - org.apache.hadoop - hadoop-aws - - - org.apache.hadoop - hadoop-aliyun - - - com.zaxxer - HikariCP - - - com.mysql - mysql-connector-j - - - - - com.googlecode.juniversalchardet - juniversalchardet - 1.0.3 - - - com.opencsv - opencsv - - - - com.aliyun.odps - odps-sdk-core - - - - - \ No newline at end of file diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/Connector.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/Connector.java deleted file mode 100644 index 070ed3d..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/Connector.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager; - -import org.secretflow.dataproxy.common.model.InferSchemaResult; -import org.secretflow.dataproxy.common.model.command.DatasetReadCommand; -import org.secretflow.dataproxy.common.model.command.DatasetWriteCommand; -import org.secretflow.dataproxy.common.model.dataset.DatasetFormatConfig; -import org.secretflow.dataproxy.common.model.datasource.location.LocationConfig; - -import org.apache.arrow.memory.BufferAllocator; - -/** - * Datasource connector - * - * @author muhong - * @date 2023-09-01 18:04 - */ -public interface Connector extends AutoCloseable { - - /** - * infer schema - * - * @param allocator Arrow data allocator - * @param locationConfig Dataset location - * @param formatConfig Dataset format - * @return Infer result - */ - InferSchemaResult inferSchema(BufferAllocator allocator, LocationConfig locationConfig, DatasetFormatConfig formatConfig); - - /** - * Build dataset reader - * - * @param allocator Arrow data allocator - * @param readCommand Read command - * @return Reader - */ - DataReader buildReader(BufferAllocator allocator, DatasetReadCommand readCommand); - - /** - * Build dataset writer - * - * @param writeCommand Write command - * @return Writer - */ - DataWriter buildWriter(DatasetWriteCommand writeCommand); - - /** - * Check connector status - * @return - */ - boolean isAvailable(); -} \ No newline at end of file diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataWriter.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataWriter.java deleted file mode 100644 index bfa3b02..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/DataWriter.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager; - -import org.apache.arrow.vector.VectorSchemaRoot; - -import java.io.IOException; - -/** - * Dataset writer - * - * @author muhong - * @date 2023-08-21 17:54 - */ -public interface DataWriter extends AutoCloseable { - - /** - * Write a batch - * - * @param root Batch to write - */ - void write(VectorSchemaRoot root) throws IOException; - - /** - * Write the remaining data in the buffer - */ - void flush() throws IOException; - - /** - * Destroy the data - */ - void destroy() throws IOException; -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataReader.java deleted file mode 100644 index 81e5415..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataReader.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.filesystem; - -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.SplitReader; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.hadoop.fs.FileSystem; - -import java.util.Arrays; -import java.util.List; - -/** - * Binary file data reader - * - * @author yumu - * @date 2023/9/12 19:17 - */ -public class BinaryFileDataReader implements DataReader { - - private final BufferAllocator allocator; - - private final FileSystem fileSystem; - - private final String uri; - - public BinaryFileDataReader(BufferAllocator allocator, - FileSystem fileSystem, - String uri) { - this.allocator = allocator; - this.fileSystem = fileSystem; - this.uri = uri; - } - - @Override - public List createSplitReader(int splitNumber) { - return Arrays.asList(new BinaryFileSplitReader(allocator, fileSystem, uri)); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataWriter.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataWriter.java deleted file mode 100644 index 45b31da..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileDataWriter.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.filesystem; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.manager.DataWriter; - -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.io.IOException; - -/** - * Binary file data writer - * - * @author muhong - * @date 2023-09-13 22:14 - */ -@Slf4j -public class BinaryFileDataWriter implements DataWriter { - - private final String FIELD_NAME = "binary_data"; - - private FSDataOutputStream outputStream; - - public BinaryFileDataWriter(FileSystem fileSystem, String uri) { - // 获取文件写入流 - try { - fileSystem.delete(new Path(uri), true); - this.outputStream = fileSystem.create(new Path(uri)); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.FILE_WRITE_STREAM_CREATE_FAILED, e.getMessage(), e); - } - } - - @Override - public void write(VectorSchemaRoot root) throws IOException { - log.info("[BinaryFileDataWriter-write] received schema:{}", root.getSchema().toJson()); - VarBinaryVector binaryVector = (VarBinaryVector) root.getVector(FIELD_NAME); - if (binaryVector == null) { - throw DataproxyException.of(DataproxyErrorCode.BINARY_DATA_FIELD_NOT_EXIST); - } - log.info("[BinaryFileDataWriter-write] root row count:{}, vector value count: {}", root.getRowCount(), binaryVector.getValueCount()); - for (int row = 0; row < root.getRowCount(); row++) { - byte[] item = binaryVector.get(row); - if (item == null) { - log.info("[BinaryFileDataWriter-write] row:{}, item is null, continue", row); - continue; - } - log.info("[BinaryFileDataWriter-write] row:{}, length:{}, value:\n{}\n", row, item.length, new String(item)); - this.outputStream.write(item); - } - } - - @Override - public void flush() throws IOException { - this.outputStream.flush(); - } - - @Override - public void destroy() throws IOException { - - } - - @Override - public void close() throws Exception { - if (this.outputStream != null) { - this.outputStream.close(); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileSplitReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileSplitReader.java deleted file mode 100644 index c6309c8..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/BinaryFileSplitReader.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.filesystem; - -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.manager.SplitReader; - -import java.io.IOException; -import java.util.List; - -/** - * Binary file data split reader - * - * @author muhong - * @date 2023-09-13 21:47 - */ -@Slf4j -public class BinaryFileSplitReader extends ArrowReader implements SplitReader { - - private static final String FIELD_NAME = "binary_data"; - private static final int BATCH_SIZE = 3 * 1024 * 1024; - private final FSDataInputStream inputStream; - - public BinaryFileSplitReader(BufferAllocator allocator, - FileSystem fileSystem, - String uri) { - super(allocator); - - // Generate file input stream - try { - this.inputStream = fileSystem.open(new Path(uri)); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.FILE_READ_STREAM_CREATE_FAILED, e); - } - } - - @Override - public ArrowReader startRead() { - return this; - } - - @Override - public boolean loadNextBatch() throws IOException { - VectorSchemaRoot root = getVectorSchemaRoot(); - root.clear(); - VarBinaryVector binaryVector = (VarBinaryVector) root.getVector(FIELD_NAME); - binaryVector.allocateNew(1); - - // 申请足够空间 - while (binaryVector.getDataBuffer().capacity() < BATCH_SIZE) { - binaryVector.reallocDataBuffer(); - } - - int length = downloadRangeToBuffer(binaryVector.getDataBuffer()); - if (length == 0) { - return false; - } - - binaryVector.getOffsetBuffer().setInt(VarBinaryVector.OFFSET_WIDTH, length); - BitVectorHelper.setBit(binaryVector.getValidityBuffer(), 0); - binaryVector.setLastSet(0); - - root.setRowCount(1); - return true; - } - - @Override - public long bytesRead() { - try { - return this.inputStream.available(); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.GET_FILE_SIZE_FAILED, e); - } - } - - @Override - protected void closeReadSource() throws IOException { - try { - if (this.inputStream != null) { - this.inputStream.close(); - } - } catch (Exception ignored) { - } - } - - @Override - protected Schema readSchema() throws IOException { - return new Schema(List.of(Field.notNullable(FIELD_NAME, new ArrowType.Binary()))); - } - - private int downloadRangeToBuffer(ArrowBuf valueBuffer) { - if (inputStream == null) { - return 0; - } - - try { - if (inputStream.available() == 0) { - return 0; - } - - byte[] bytes = new byte[BATCH_SIZE]; - int length = inputStream.read(bytes); - valueBuffer.writeBytes(bytes, 0, length); - return length; - } catch (IOException e) { - throw DataproxyException.of(DataproxyErrorCode.FILE_BATCH_DOWNLOAD_FAILED, e); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataReader.java deleted file mode 100644 index f9ac4d5..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataReader.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.filesystem; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.hadoop.fs.FileSystem; -import org.secretflow.dataproxy.common.model.dataset.format.CSVFormatConfig; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.SplitReader; - -import java.util.List; - -/** - * CSV file data reader - * - * @author muhong - * @date 2023-09-11 12:00 - */ -public class CSVDataReader implements DataReader { - - private final FileSystem fileSystem; - - private final String uri; - - private final BufferAllocator allocator; - - private final CSVFormatConfig formatConfig; - - private final List fieldList; - - private final Schema schema; - - public CSVDataReader(BufferAllocator allocator, - FileSystem fileSystem, - String uri, - Schema schema, - CSVFormatConfig formatConfig, - List fieldList) { - this.allocator = allocator; - this.fileSystem = fileSystem; - this.uri = uri; - this.schema = schema; - this.formatConfig = formatConfig; - this.fieldList = fieldList; - } - - @Override - public List createSplitReader(int splitNumber) { - return List.of(new CSVSplitReader(allocator, fileSystem, uri, schema, formatConfig, fieldList)); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataWriter.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataWriter.java deleted file mode 100644 index ffedc0f..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVDataWriter.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.filesystem; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.dataset.format.CSVFormatConfig; -import org.secretflow.dataproxy.manager.DataWriter; - -import com.opencsv.CSVParserBuilder; -import com.opencsv.ICSVParser; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; - -/** - * CSV file data writer - * - * @author muhong - * @date 2023-09-11 12:01 - */ -public class CSVDataWriter implements DataWriter { - - // 数据写状态 - private FSDataOutputStream outputStream; - private ICSVParser rowParser; - private boolean headerWriteFinished = false; - - public CSVDataWriter(FileSystem fileSystem, - String uri, - CSVFormatConfig formatConfig) { - // 配置静态 csv 数据源文件格式解析器 - this.rowParser = new CSVParserBuilder() - .withSeparator(formatConfig.getSeparator().charAt(0)) - .withQuoteChar(formatConfig.getQuoteChar().charAt(0)) - .withEscapeChar(formatConfig.getEscapeChar().charAt(0)) - .build(); - - // 获取文件读取流 - try { - fileSystem.delete(new Path(uri), true); - this.outputStream = fileSystem.create(new Path(uri)); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.FILE_WRITE_STREAM_CREATE_FAILED, e.getLocalizedMessage(), e); - } - } - - @Override - public void write(VectorSchemaRoot root) throws IOException { - - // 表头写入 - if (!headerWriteFinished) { - String[] fieldNames = root.getSchema().getFields().stream().map(Field::getName).toArray(String[]::new); - String headerLine = this.rowParser.parseToLine(fieldNames, false); - this.outputStream.write(headerLine.getBytes(StandardCharsets.UTF_8)); - this.headerWriteFinished = true; - } - - // 数据逐行写入 - for (int row = 0; row < root.getRowCount(); row++) { - String[] values = new String[root.getFieldVectors().size()]; - for (int col = 0; col < root.getFieldVectors().size(); col++) { - values[col] = serialize(root.getVector(col).getObject(row)); - } - - String rowLine = "\n" + this.rowParser.parseToLine(values, false); - this.outputStream.write(rowLine.getBytes(StandardCharsets.UTF_8)); - } - } - - @Override - public void flush() throws IOException { - if (this.outputStream != null) { - this.outputStream.flush(); - } - } - - @Override - public void destroy() throws IOException { - - } - - @Override - public void close() throws Exception { - this.flush(); - - if (this.outputStream != null) { - this.outputStream.close(); - } - } - - /** - * 数据序列化为字符串 - * - * @param value 原始数据 - * @return - */ - private String serialize(Object value) { - // 文本数据无法区分为空内容还是null,序列化为空内容 - if (value == null) { - return ""; - } - - // 字节数组单独处理 - if (value instanceof byte[]) { - return new String((byte[]) value); - } - - // 其余类型数据直接调用toString方法 - return value.toString(); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVSplitReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVSplitReader.java deleted file mode 100644 index 1277943..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/CSVSplitReader.java +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.filesystem; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.dataset.format.CSVFormatConfig; -import org.secretflow.dataproxy.manager.SplitReader; - -import com.opencsv.CSVParserBuilder; -import com.opencsv.ICSVParser; -import lombok.extern.slf4j.Slf4j; -import okio.Buffer; -import okio.ByteString; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.*; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.collections4.MapUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.mozilla.universalchardet.Constants; -import org.mozilla.universalchardet.UniversalDetector; - -import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.time.LocalDateTime; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * CSV file data split reader - * - * @author muhong - * @date 2023-09-11 12:02 - */ -@Slf4j -public class CSVSplitReader extends ArrowReader implements SplitReader { - - private static final int FILE_READ_BATCH_SIZE = 3 * 1024 * 1024; - - private static final int ARROW_DATA_ROW_SIZE = 10000; - - - private final FSDataInputStream inputStream; - private Charset charset = null; - private final Buffer buffer; - private final ICSVParser rowParser; - - private boolean finished = false; - - private final Schema schema; - - /** - * Sequential mapping of original data fields to output fields - */ - private final List rawIndexList; - - /** - * Original data header list - */ - private List headerList; - - public CSVSplitReader(BufferAllocator allocator, - FileSystem fileSystem, - String uri, - Schema schema, - CSVFormatConfig formatConfig, - List fieldList) { - super(allocator); - this.buffer = new Buffer(); - // Build CSV parser - this.rowParser = new CSVParserBuilder() - .withSeparator(formatConfig.getSeparator().charAt(0)) - .withQuoteChar(formatConfig.getQuoteChar().charAt(0)) - .withEscapeChar(formatConfig.getEscapeChar().charAt(0)) - .build(); - - // Generate file input stream - try { - this.inputStream = fileSystem.open(new Path(uri)); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.FILE_READ_STREAM_CREATE_FAILED, e.getMessage(), e); - } - - // Parse header - parseHeader(); - - // Infer schema - Map rawToArrowFiledNameMap = MapUtils.isNotEmpty(formatConfig.getFieldMap()) ? formatConfig.getFieldMap() : new HashMap<>(); - if (schema == null) { - schema = new Schema(this.headerList.stream() - .map(rawName -> Field.nullable(rawToArrowFiledNameMap.getOrDefault(rawName, rawName), new ArrowType.Utf8())) - .collect(Collectors.toList()) - ); - } - - // Read by specific order - if (CollectionUtils.isNotEmpty(fieldList)) { - this.schema = new Schema(fieldList.stream().map(schema::findField).collect(Collectors.toList())); - } else { - this.schema = schema; - } - - // Generate sequential mapping of original data fields to output fields - Map arrowToRawFiledNameMap = rawToArrowFiledNameMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); - this.rawIndexList = this.schema.getFields().stream().map(field -> { - String rawFieldName = arrowToRawFiledNameMap.getOrDefault(field.getName(), field.getName()); - return this.headerList.indexOf(rawFieldName); - }).collect(Collectors.toList()); - } - - /** - * Pre allocate memory - * - * @param root Target root - * @param targetSize Target size - */ - private static void preAllocate(VectorSchemaRoot root, int targetSize) { - for (ValueVector vector : root.getFieldVectors()) { - // Only for fixed-length type data - if (vector instanceof BaseFixedWidthVector) { - ((BaseFixedWidthVector) vector).allocateNew(targetSize); - } - } - } - - /** - * Deserialize data and write it into vector - * - * @param vector Vector - * @param index Data col index - * @param value Serialized data - */ - private static void addValueInVector(FieldVector vector, int index, String value) { - if (value == null || StringUtils.isEmpty(value)) { - vector.setNull(index); - return; - } - - try { - switch (vector.getMinorType()) { - case TINYINT: - ((TinyIntVector) vector).setSafe(index, Integer.parseInt(value)); - break; - case SMALLINT: - ((SmallIntVector) vector).setSafe(index, Integer.parseInt(value)); - break; - case INT: - ((IntVector) vector).setSafe(index, Integer.parseInt(value)); - break; - case BIGINT: - ((BigIntVector) vector).setSafe(index, Long.parseLong(value)); - break; - case UINT1: - ((UInt1Vector) vector).setSafe(index, Integer.parseInt(value)); - break; - case UINT2: - ((UInt2Vector) vector).setSafe(index, Integer.parseInt(value)); - break; - case UINT4: - ((UInt4Vector) vector).setSafe(index, Integer.parseInt(value)); - break; - case UINT8: - ((UInt8Vector) vector).setSafe(index, Long.parseLong(value)); - break; - case FLOAT4: - ((Float4Vector) vector).setSafe(index, Float.parseFloat(value)); - break; - case FLOAT8: - ((Float8Vector) vector).setSafe(index, Double.parseDouble(value)); - break; - case BIT: - // Compatible with true/false, 0/1 - if ("true".equalsIgnoreCase(value)) { - ((BitVector) vector).setSafe(index, 1); - } else if ("false".equalsIgnoreCase(value)) { - ((BitVector) vector).setSafe(index, 0); - } else { - ((BitVector) vector).setSafe(index, Integer.parseInt(value)); - } - break; - case DATEDAY: - ((DateDayVector) vector).setSafe(index, Integer.parseInt(value)); - break; - case DATEMILLI: - ((DateMilliVector) vector).setSafe(index, 1000L * LocalDateTime.parse(value).getSecond()); - break; - case VARCHAR: - ((VarCharVector) vector).setSafe(index, value.getBytes(StandardCharsets.UTF_8)); - break; - case VARBINARY: - ((VarBinaryVector) vector).setSafe(index, value.getBytes(StandardCharsets.UTF_8)); - break; - } - } catch (NumberFormatException e) { - throw DataproxyException.of(DataproxyErrorCode.DATA_FORMAT_CONVERT_FAILED, - String.format("%s field data \"%s\" cannot be cast to %s", vector.getName(), value, vector.getMinorType()), e); - } - } - - @Override - public ArrowReader startRead() { - return this; - } - - @Override - public boolean loadNextBatch() throws IOException { - if (finished) { - return false; - } - - VectorSchemaRoot root = getVectorSchemaRoot(); - root.clear(); - preAllocate(root, ARROW_DATA_ROW_SIZE); - - int count = 0; - while (count < ARROW_DATA_ROW_SIZE) { - String dataLine = readNextLine(); - if (StringUtils.isEmpty(dataLine)) { - this.finished = true; - break; - } - - String[] serializedDataRow = parseLine(dataLine); - for (int col = 0; col < serializedDataRow.length; col++) { - FieldVector fieldVector = root.getVector(col); - addValueInVector(fieldVector, count, serializedDataRow[this.rawIndexList.get(col)]); - } - count++; - root.setRowCount(count); - } - return true; - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() throws IOException { - try { - if (this.inputStream != null) { - this.inputStream.close(); - } - } catch (Exception ignored) { - } - } - - @Override - protected Schema readSchema() throws IOException { - return this.schema; - } - - /** - * Parse header - */ - private void parseHeader() { - // Parse header line - String headerLine = readNextLine(); - if (StringUtils.isBlank(headerLine)) { - throw DataproxyException.of(DataproxyErrorCode.HEADER_LINE_NOT_EXIST); - } - - String[] headerList = parseLine(headerLine); - if (headerList == null) { - throw DataproxyException.of(DataproxyErrorCode.HEADER_LINE_PARSE_FAILED); - } - this.headerList = Arrays.asList(headerList); - } - - /** - * Parse data line - * - * @param line Original data - * @return Split data list - */ - private String[] parseLine(String line) { - try { - return rowParser.parseLine(line); - } catch (IOException e) { - throw DataproxyException.of(DataproxyErrorCode.VALUE_LINE_PARSE_FAILED, e); - } - } - - /** - * Read next line - * - * @return Next line - */ - private String readNextLine() { - // First determine whether there is a line in the buffer - // If so, return directly, if not, try to download. - // If new data is downloaded, it is judged again whether there is a line. - // If there is no new data to download, the remaining data in the buffer is returned. - boolean continueDownload; - do { - continueDownload = !isLineInBuffer() && downloadRangeToBuffer(); - } while (continueDownload); - - try { - // Try to detect csv encoding during initialization - boolean isInit = false; - if (charset == null) { - isInit = true; - detectEncoding(buffer); - } - // Check if there is a BOM header, if so remove it - if (isInit) { - removeBom(buffer); - } - // Read data according to the recognized charset - return readLineOfCharset(buffer); - } catch (IOException e) { - throw DataproxyException.of(DataproxyErrorCode.READ_DATA_LINE_FAILED, e); - } - } - - // Detect and remove BOM header of CSV - private void removeBom(Buffer buffer) { - try { - if (buffer.size() != 0) { - ByteString firstLine = buffer.copy().readByteString(); - switch (firstLine.getByte(0) & 0xFF) { - case 0xEF: - if (firstLine.size() > 2 && - (firstLine.getByte(1) & 0xFF) == 0xBB - && (firstLine.getByte(2) & 0xFF) == 0xBF) { - buffer.skip(3); - } - break; - case 0xFE: - if (firstLine.size() > 3 && - (firstLine.getByte(1) & 0xFF) == 0xFF - && (firstLine.getByte(2) & 0xFF) == 0x00 - && ((firstLine.getByte(3) & 0xFF) == 0x00)) { - buffer.skip(4); - } else if (firstLine.size() > 1 - && ((firstLine.getByte(1) & 0xFF) == 0xFF)) { - buffer.skip(2); - } - break; - case 0x00: - if (firstLine.size() > 3) { - if ((firstLine.getByte(1) & 0xFF) == 0x00) { - if ((firstLine.getByte(2) & 0xFF) == 0xFE - && (firstLine.getByte(3) & 0xFF) == 0xFF) { - buffer.skip(4); - } else if ((firstLine.getByte(2) & 0xFF) == 0xFF - && (firstLine.getByte(3) & 0xFF) == 0xFE) { - buffer.skip(4); - } - } - } - break; - case 0xFF: - if (firstLine.size() > 3 && - (firstLine.getByte(1) & 0xFF) == 0xFE - && (firstLine.getByte(2) & 0xFF) == 0x00 - && ((firstLine.getByte(3) & 0xFF) == 0x00)) { - buffer.skip(4); - } else if (firstLine.size() > 1 - && ((firstLine.getByte(1) & 0xFF) == 0xFE)) { - buffer.skip(2); - } - break; - } - } - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.BOM_REMOVE_FAILED, e); - } - } - - // Detect charset - private void detectEncoding(Buffer buffer) { - try { - UniversalDetector detector = new UniversalDetector(null); - ByteString firstLine = buffer.copy().readByteString(); - detector.handleData(firstLine.toByteArray(), 0, firstLine.size()); - detector.dataEnd(); - if (detector.getDetectedCharset() != null) { - if (!Charset.forName(detector.getDetectedCharset()).equals(StandardCharsets.UTF_8)) { - // The consensus that is not UTF-8 is GB18030 - charset = Charset.forName(Constants.CHARSET_GB18030); - } else { - charset = StandardCharsets.UTF_8; - } - } else { - // Use UTF-8 when the detect fails - charset = StandardCharsets.UTF_8; - } - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.DETECT_ENCODING_FAILED, e); - } - } - - private String readLineOfCharset(Buffer buffer) throws IOException { - long locOfN = buffer.indexOf(ByteString.of((byte) '\n')); - if (locOfN != -1L) { - if (locOfN > 0 && buffer.getByte(locOfN - 1) == (byte) '\r') { - // \r\n - String result = buffer.readString(locOfN - 1, charset); - buffer.skip(2); - return result; - } else { - // \n - String result = buffer.readString(locOfN, charset); - buffer.skip(1); - return result; - } - } else if (buffer.size() != 0L) { - return buffer.readString(charset); - } else { - return null; - } - } - - private boolean downloadRangeToBuffer() { - if (inputStream == null) { - return false; - } - - try { - if (inputStream.available() == 0) { - return false; - } - - byte[] bytes = new byte[(int) FILE_READ_BATCH_SIZE]; - int length = inputStream.read(bytes); - buffer.write(bytes, 0, length); - return true; - } catch (IOException e) { - throw DataproxyException.of(DataproxyErrorCode.FILE_BATCH_DOWNLOAD_FAILED, e); - } - } - - private boolean isLineInBuffer() { - return buffer.indexOf((byte) '\n') != -1L; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/FileSystemConnector.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/FileSystemConnector.java deleted file mode 100644 index 65228d7..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/filesystem/FileSystemConnector.java +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.filesystem; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.FlightContentFormatConfig; -import org.secretflow.dataproxy.common.model.FlightContentFormatTypeEnum; -import org.secretflow.dataproxy.common.model.InferSchemaResult; -import org.secretflow.dataproxy.common.model.command.DatasetReadCommand; -import org.secretflow.dataproxy.common.model.command.DatasetWriteCommand; -import org.secretflow.dataproxy.common.model.dataset.DatasetFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.format.CSVFormatConfig; -import org.secretflow.dataproxy.common.model.datasource.DatasourceTypeEnum; -import org.secretflow.dataproxy.common.model.datasource.conn.ConnConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.LocalFileSystemConnConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.ObjectFileSystemConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.FileSystemLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.location.LocationConfig; -import org.secretflow.dataproxy.common.utils.DPStringUtils; -import org.secretflow.dataproxy.common.utils.JsonUtils; -import org.secretflow.dataproxy.manager.Connector; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.DataWriter; -import org.secretflow.dataproxy.manager.SplitReader; - -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.net.URI; -import java.util.Arrays; -import java.util.List; - -/** - * File system connector - * - * @author muhong - * @date 2023-09-11 09:49 - */ -@Slf4j -public class FileSystemConnector implements Connector { - - /** - * Root uri - */ - private final String rootUri; - - /** - * Filesystem - */ - private final FileSystem fileSystem; - - /** - * 文件系统连接器构造函数 - * - * @param type 文件系统数据源类型 - * @param connConfig 文件系统数据源连接信息,根据类型不同而不同 - */ - public FileSystemConnector(DatasourceTypeEnum type, ConnConfig connConfig) { - // 文件系统参数构建 - Configuration configuration = new Configuration(); - switch (type) { - case MINIO: { - ObjectFileSystemConnConfig minioConnConfig = (ObjectFileSystemConnConfig) connConfig; - rootUri = generateUri(type.getScheme(), minioConnConfig.getBucket(), minioConnConfig.getObjectKeyPrefix()); - configuration.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem"); - configuration.set("fs.s3a.endpoint", minioConnConfig.getEndpoint()); - configuration.set("fs.s3a.access.key", minioConnConfig.getAccessKey()); - configuration.set("fs.s3a.secret.key", minioConnConfig.getAccessSecret()); - configuration.set("fs.s3a.buffer.dir", "./dp/buffer"); - configuration.set("fs.s3a.connection.ssl.enabled", "false"); - configuration.setInt("fs.s3a.connection.timeout", 7200000); - // 减少重试次数,避免阻塞 - configuration.setInt("fs.s3a.attempts.maximum", 1); - configuration.setInt("fs.s3a.retry.limit", 1); - break; - } - case OSS: { - ObjectFileSystemConnConfig ossConnConfig = (ObjectFileSystemConnConfig) connConfig; - rootUri = generateUri(type.getScheme(), ossConnConfig.getBucket(), ossConnConfig.getObjectKeyPrefix()); - configuration.set("fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"); - configuration.set("fs.oss.endpoint", ossConnConfig.getEndpoint()); - configuration.set("fs.oss.accessKeyId", ossConnConfig.getAccessKey()); - configuration.set("fs.oss.accessKeySecret", ossConnConfig.getAccessSecret()); - configuration.set("fs.oss.buffer.dir", "./dp/buffer"); - configuration.set("fs.oss.timeout.millisecond", String.valueOf(7200000)); - configuration.setInt("fs.oss.attempts.maximum", 1); - break; - } - case OBS: { - ObjectFileSystemConnConfig obsConnConfig = (ObjectFileSystemConnConfig) connConfig; - rootUri = generateUri(type.getScheme(), obsConnConfig.getBucket(), obsConnConfig.getObjectKeyPrefix()); - configuration.set("fs.obs.impl", "org.apache.hadoop.fs.obs.OBSFileSystem"); - configuration.set("fs.obs.endpoint", obsConnConfig.getEndpoint()); - configuration.set("fs.obs.accessKeyId", obsConnConfig.getAccessKey()); - configuration.set("fs.obs.accessKeySecret", obsConnConfig.getAccessSecret()); - configuration.set("fs.obs.buffer.dir", "./dp/buffer"); - configuration.set("fs.obs.timeout.millisecond", String.valueOf(7200000)); - configuration.setInt("fs.obs.attempts.maximum", 1); - break; - } - case LOCAL_HOST: { - LocalFileSystemConnConfig localFsConnConfig = (LocalFileSystemConnConfig) connConfig; - rootUri = generateUri(type.getScheme(), localFsConnConfig.getPath()); - break; - } - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的文件系统数据源" + type); - } - - // 构建文件系统连接器 - try { - this.fileSystem = FileSystem.newInstance(new URI(rootUri), configuration); - } catch (Exception e) { - log.error("[FileSystemConnector] 创建文件系统连接器失败,type:{}, config:{}", type, JsonUtils.toJSONString(connConfig), e); - throw DataproxyException.of(DataproxyErrorCode.CREATE_DATASOURCE_CONNECTOR_ERROR, e); - } - } - - @Override - public InferSchemaResult inferSchema(BufferAllocator allocator, LocationConfig locationConfig, DatasetFormatConfig formatConfig) { - String uri = generateFileUri(((FileSystemLocationConfig) locationConfig).getRelativePath()); - - DataReader dataReader = null; - switch (formatConfig.getType()) { - case CSV: - dataReader = new CSVDataReader(allocator, this.fileSystem, uri, null, (CSVFormatConfig) formatConfig.getFormatConfig(), null); - break; - case BINARY_FILE: - dataReader = new BinaryFileDataReader(allocator, this.fileSystem, uri); - break; - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的文件格式 " + formatConfig.getType()); - } - List splitReader = dataReader.createSplitReader(1); - try (ArrowReader arrowReader = splitReader.get(0).startRead()) { - return InferSchemaResult.builder() - .schema(arrowReader.getVectorSchemaRoot().getSchema()) - .datasetFormatConfig(DatasetFormatConfig.builder() - .type(formatConfig.getType()) - .formatConfig(formatConfig.getFormatConfig()) - .build()) - .build(); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.READER_RELEASE_FAILED, e); - } - } - - @Override - public DataReader buildReader(BufferAllocator allocator, DatasetReadCommand readCommand) { - FileSystemLocationConfig fileSystemLocationConfig = (FileSystemLocationConfig) readCommand.getLocationConfig().getLocationConfig(); - String uri = generateFileUri(fileSystemLocationConfig.getRelativePath()); - - FlightContentFormatConfig outputFormatConfig = readCommand.getOutputFormatConfig(); - switch (readCommand.getFormatConfig().getType()) { - case CSV: - // 存储格式为csv,输出指定为结构化数据(或缺省)时,结构化输出,其余都按二进制输出 - if (outputFormatConfig == null || outputFormatConfig.getFormatType() == FlightContentFormatTypeEnum.STRUCTURED_DATA) { - log.info("[FileSystemConnector - buildReader] 结构化数据读取,uri:{}", uri); - return new CSVDataReader(allocator, this.fileSystem, uri, readCommand.getSchema(), (CSVFormatConfig) readCommand.getFormatConfig().getFormatConfig(), readCommand.getFieldList()); - } - case BINARY_FILE: - log.info("[FileSystemConnector - buildReader] 二进制文件读取,uri:{}", uri); - return new BinaryFileDataReader(allocator, this.fileSystem, uri); - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的文件格式 " + readCommand.getFormatConfig().getType()); - } - } - - @Override - public DataWriter buildWriter(DatasetWriteCommand writeCommand) { - FileSystemLocationConfig fileSystemLocationConfig = (FileSystemLocationConfig) writeCommand.getLocationConfig().getLocationConfig(); - String uri = generateFileUri(fileSystemLocationConfig.getRelativePath()); - - FlightContentFormatConfig inputFormatConfig = writeCommand.getInputFormatConfig(); - switch (writeCommand.getFormatConfig().getType()) { - case CSV: - if (inputFormatConfig == null || inputFormatConfig.getFormatType() == FlightContentFormatTypeEnum.STRUCTURED_DATA) { - log.info("[FileSystemConnector - buildWriter] STRUCTURED_DATA,uri:{}", uri); - return new CSVDataWriter(this.fileSystem, uri, (CSVFormatConfig) writeCommand.getFormatConfig().getFormatConfig()); - } - case BINARY_FILE: - log.info("[FileSystemConnector - buildWriter] BINARY_FILE,uri:{}", uri); - return new BinaryFileDataWriter(this.fileSystem, uri); - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的文件格式 " + writeCommand.getFormatConfig().getType()); - } - } - - @Override - public boolean isAvailable() { - try { - this.fileSystem.getFileStatus(new Path(rootUri)); - return true; - } catch (Exception e) { - log.info("[FileSystemConnector] check status error, uri:{}", this.rootUri, e); - return false; - } - } - - @Override - public void close() throws Exception { - this.fileSystem.close(); - } - - /** - * 生成文件路径 - * - * @param scheme 协议类型 - * @param path - * @return - */ - private String generateUri(String scheme, String... path) { - return scheme + - DPStringUtils.joinWithoutEmpty("/", - Arrays.stream(path).map(item -> DPStringUtils.removeDecorateIdentifier(item, "/")).toArray(String[]::new) - ); - } - - private String generateFileUri(String relativePath){ - return DPStringUtils.removeDecorateIdentifier(this.rootUri, "/") + "/" + DPStringUtils.removeDecorateIdentifier(relativePath, "/"); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsConnector.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsConnector.java deleted file mode 100644 index 39753d6..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsConnector.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright 2024 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.secretflow.dataproxy.manager.connector.odps; - -import com.aliyun.odps.OdpsException; -import org.apache.arrow.memory.BufferAllocator; -import org.secretflow.dataproxy.common.model.InferSchemaResult; -import org.secretflow.dataproxy.common.model.command.DatasetReadCommand; -import org.secretflow.dataproxy.common.model.command.DatasetWriteCommand; -import org.secretflow.dataproxy.common.model.dataset.DatasetFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.format.DatasetFormatTypeEnum; -import org.secretflow.dataproxy.common.model.datasource.DatasourceConnConfig; -import org.secretflow.dataproxy.common.model.datasource.DatasourceTypeEnum; -import org.secretflow.dataproxy.common.model.datasource.conn.ConnConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.LocationConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.dataproxy.manager.Connector; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.DataWriter; - -import java.io.IOException; -import java.util.Objects; - -/** - * odps Connector - * - * @author yuexie - * @date 2024-06-01 17:08:45 - */ -public class OdpsConnector implements Connector { - - /** - * odps connection config - */ - private final OdpsConnConfig config; - - public OdpsConnector(ConnConfig config) { - if (!(config instanceof OdpsConnConfig odpsConnConfig)) { - throw new IllegalArgumentException("Invalid conn config type."); - } - this.config = odpsConnConfig; - } - - @Override - public InferSchemaResult inferSchema(BufferAllocator allocator, LocationConfig locationConfig, DatasetFormatConfig formatConfig) { - - return InferSchemaResult.builder() - .datasetFormatConfig(formatConfig) - .schema(null) - .build(); - - } - - @Override - public DataReader buildReader(BufferAllocator allocator, DatasetReadCommand readCommand) { - - if (invalidateConnectionType(readCommand.getConnConfig())) { - throw new IllegalArgumentException("[ODPS] Unsupported datasource type."); - } - - if (Objects.equals(DatasetFormatTypeEnum.TABLE, readCommand.getFormatConfig().getType())) { - return new OdpsDataReader(allocator, config, (OdpsTableInfo) readCommand.getLocationConfig().getLocationConfig(), readCommand.getSchema()); - } - return new OdpsResourceReader(allocator, config, (OdpsTableInfo) readCommand.getLocationConfig().getLocationConfig()); - } - - @Override - public DataWriter buildWriter(DatasetWriteCommand writeCommand) { - - if (invalidateConnectionType(writeCommand.getConnConfig())) { - throw new IllegalArgumentException("[ODPS] Unsupported datasource type."); - } - OdpsTableInfo locationConfig = (OdpsTableInfo) writeCommand.getLocationConfig().getLocationConfig(); - - if (Objects.equals(DatasetFormatTypeEnum.TABLE, writeCommand.getFormatConfig().getType())) { - try { - return new OdpsDataWriter(config, locationConfig, writeCommand.getSchema()); - } catch (IOException | OdpsException e) { - throw new RuntimeException(e); - } - } - return new OdpsResourceWriter(config, locationConfig); - } - - @Override - public boolean isAvailable() { - return true; - } - - @Override - public void close() throws Exception { - // odps no function to close - } - - /** - * 判断是否为无效的 type - * - * @param connConfig 连接配置 - * @return boolean true 表示无效 - */ - private boolean invalidateConnectionType(DatasourceConnConfig connConfig) { - if (connConfig == null || connConfig.getType() == null) { - return true; - } - return connConfig.getType() != DatasourceTypeEnum.ODPS; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataReader.java deleted file mode 100644 index d848e52..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataReader.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2024 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.secretflow.dataproxy.manager.connector.odps; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Schema; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.SplitReader; - -import java.util.List; - -/** - * odps Table Reader - * - * @author yuexie - * @date 2024-06-01 17:08:45 - */ -public class OdpsDataReader implements DataReader { - - private final OdpsConnConfig odpsConnConfig; - private final BufferAllocator allocator; - private final OdpsTableInfo tableInfo; - private final Schema schema; - - public OdpsDataReader(BufferAllocator allocator, OdpsConnConfig odpsConnConfig, OdpsTableInfo tableInfo, Schema schema) { - this.odpsConnConfig = odpsConnConfig; - this.allocator = allocator; - this.tableInfo = tableInfo; - this.schema = schema; - } - - @Override - public List createSplitReader(int splitNumber) { - // TODO: spilt reader - return List.of(new OdpsSplitArrowReader(allocator, odpsConnConfig, tableInfo, schema)); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceReader.java deleted file mode 100644 index c2f20a3..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceReader.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2024 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.secretflow.dataproxy.manager.connector.odps; - -import org.apache.arrow.memory.BufferAllocator; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.SplitReader; - -import java.util.List; - -/** - * odps Resource Reader - * - * @author yuexie - * @date 2024-06-01 17:08:45 - */ -public class OdpsResourceReader implements DataReader { - - private final OdpsConnConfig odpsConnConfig; - private final BufferAllocator allocator; - private final OdpsTableInfo tableInfo; - - public OdpsResourceReader(BufferAllocator allocator, OdpsConnConfig odpsConnConfig, OdpsTableInfo tableInfo) { - this.odpsConnConfig = odpsConnConfig; - this.allocator = allocator; - this.tableInfo = tableInfo; - } - - @Override - public List createSplitReader(int splitNumber) { - return List.of(new OdpsResourceSplitReader(allocator, odpsConnConfig, tableInfo)); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceWriter.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceWriter.java deleted file mode 100644 index 772e172..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceWriter.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2024 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.secretflow.dataproxy.manager.connector.odps; - - -import com.aliyun.odps.FileResource; -import com.aliyun.odps.NoSuchObjectException; -import com.aliyun.odps.Odps; -import com.aliyun.odps.OdpsException; -import com.aliyun.odps.Resource; -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.dataproxy.manager.DataWriter; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; - -/** - * odps Resource Writer - * - * @author yuexie - * @date 2024-06-01 17:08:45 - */ -@Slf4j -public class OdpsResourceWriter implements DataWriter { - - private final OdpsConnConfig odpsConnConfig; - private final OdpsTableInfo odpsTableInfo; - - private Odps odps; - - private static final String FIELD_NAME = "binary_data"; - - private InputStream odpsInputStream = null; - - public OdpsResourceWriter(OdpsConnConfig odpsConnConfig, OdpsTableInfo odpsTableInfo) { - this.odpsConnConfig = odpsConnConfig; - this.odpsTableInfo = odpsTableInfo; - initOdps(); - } - - - @Override - public void write(VectorSchemaRoot root) throws IOException { - - FieldVector vector = root.getVector(FIELD_NAME); - - if (vector instanceof VarBinaryVector varBinaryVector) { - - int rowCount = root.getRowCount(); - for (int row = 0; row < rowCount; row++) { - byte[] bytes = varBinaryVector.get(row); - - odpsInputStream = new ByteArrayInputStream(bytes); - FileResource fileResource = new FileResource(); - fileResource.setName(odpsTableInfo.tableName()); - try { - if (resourceExists(odps, odpsTableInfo.tableName())) { - odps.resources().update(fileResource, odpsInputStream); - } else { - odps.resources().create(fileResource, odpsInputStream); - } - } catch (OdpsException e) { - throw new RuntimeException(e); - } - } - } else { - throw DataproxyException.of(DataproxyErrorCode.UNSUPPORTED_FIELD_TYPE, "Only support VarBinaryVector type"); - } - - } - - @Override - public void flush() throws IOException { - - } - - @Override - public void destroy() throws IOException { - - } - - @Override - public void close() throws Exception { - if (odpsInputStream != null) { - odpsInputStream.close(); - } - } - - private void initOdps() { - odps = OdpsUtil.buildOdps(odpsConnConfig); - } - - private static boolean resourceExists(Odps odps, String resourceName) throws OdpsException { - try { - Resource resource = odps.resources().get(resourceName); - resource.reload(); - return true; - } catch (NoSuchObjectException e) { - return false; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsSplitArrowReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsSplitArrowReader.java deleted file mode 100644 index a1ee214..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsSplitArrowReader.java +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright 2024 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.secretflow.dataproxy.manager.connector.odps; - -import com.aliyun.odps.Column; -import com.aliyun.odps.Instance; -import com.aliyun.odps.Odps; -import com.aliyun.odps.OdpsException; -import com.aliyun.odps.PartitionSpec; -import com.aliyun.odps.data.ArrayRecord; -import com.aliyun.odps.data.Record; -import com.aliyun.odps.task.SQLTask; -import com.aliyun.odps.tunnel.InstanceTunnel; -import com.aliyun.odps.tunnel.TunnelException; -import com.aliyun.odps.tunnel.io.TunnelRecordReader; -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedWidthVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VariableWidthVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ValueVectorUtility; -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.dataproxy.manager.SplitReader; - -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -/** - * odps Table Split Reader - * - * @author yuexie - * @date 2024-06-01 17:08:45 - */ -@Slf4j -public class OdpsSplitArrowReader extends ArrowReader implements SplitReader, AutoCloseable { - - private final OdpsConnConfig odpsConnConfig; - - private final OdpsTableInfo tableInfo; - - private final Schema schema; - - private final int batchSize = 10000; - - private boolean partitioned = false; - - private InstanceTunnel.DownloadSession downloadSession; - - private int currentIndex = 0; - - private final Set columns = new HashSet<>(); - - private final Pattern columnOrValuePattern = Pattern.compile("^[\\u00b7A-Za-z0-9\\u4e00-\\u9fa5\\-_,.]*$"); - - private final ExecutorService executorService = Executors.newSingleThreadExecutor(); - - private final LinkedBlockingQueue recordQueue = new LinkedBlockingQueue<>(batchSize); - - protected OdpsSplitArrowReader(BufferAllocator allocator, OdpsConnConfig odpsConnConfig, OdpsTableInfo tableInfo, Schema schema) { - super(allocator); - this.odpsConnConfig = odpsConnConfig; - this.tableInfo = tableInfo; - this.schema = schema; - } - - @Override - public boolean loadNextBatch() throws IOException { - VectorSchemaRoot root = getVectorSchemaRoot(); - root.clear(); - long resultCount = downloadSession.getRecordCount(); - log.info("Load next batch start, recordCount: {}", resultCount); - - if (currentIndex >= resultCount) { - return false; - } - - int recordCount = 0; - - try (TunnelRecordReader records = downloadSession.openRecordReader(currentIndex, batchSize, true)) { - - Record firstRecord = records.read(); - if (firstRecord != null) { - ValueVectorUtility.preAllocate(root, batchSize); - root.setRowCount(batchSize); - - root.getFieldVectors().forEach(fieldVector -> { - if (fieldVector instanceof FixedWidthVector baseFixedWidthVector) { - baseFixedWidthVector.allocateNew(batchSize); - } else if (fieldVector instanceof VariableWidthVector baseVariableWidthVector){ - baseVariableWidthVector.allocateNew(batchSize * 32); - } - }); - - - Future submitFuture = executorService.submit(() -> { - try { - int takeRecordCount = 0; - - for(;;) { - - Record record = recordQueue.take(); - - if (record instanceof ArrayRecord && record.getColumns().length == 0) { - log.info("recordQueue take record take Count: {}", takeRecordCount); - break; - } - - ValueVectorUtility.ensureCapacity(root, takeRecordCount + 1); - this.toArrowVector(record, root, takeRecordCount); - takeRecordCount++; - - } - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - }); - - columns.addAll(Arrays.stream(firstRecord.getColumns()).map(Column::getName).collect(Collectors.toSet())); - - recordQueue.put(firstRecord); - recordCount++; - // 使用 #read() 方法迭代读取,将会处理历史的 record 记录的数据,异步时,将读取不到数据,可使用 #clone() 方法,性能差距不大 - for (Record record : records) { - try { - recordQueue.put(record); - recordCount++; - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - recordQueue.put(new ArrayRecord(new Column[0])); - log.info("recordQueue put record Count: {}", recordCount); - - submitFuture.get(); - currentIndex += batchSize; - } else { - log.warn("Read first record is null, maybe it has been read."); - } - - } catch (TunnelException | ExecutionException | InterruptedException e) { - throw new RuntimeException(e); - } - root.setRowCount(recordCount); - log.info("Load next batch success, recordCount: {}", recordCount); - return true; - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() throws IOException { - executorService.shutdownNow(); - } - - @Override - protected Schema readSchema() throws IOException { - return this.schema; - } - - @Override - public ArrowReader startRead() { - - Odps odps = OdpsUtil.buildOdps(odpsConnConfig); - String sql = ""; - try { - partitioned = odps.tables().get(odpsConnConfig.getProjectName(), tableInfo.tableName()).isPartitioned(); - - sql = this.buildSql(tableInfo.tableName(), tableInfo.fields(), tableInfo.partitionSpec()); - Instance instance = SQLTask.run(odps, odpsConnConfig.getProjectName(), sql, OdpsUtil.getSqlFlag(), null); - - log.info("SQLTask run start, sql: {}", sql); - // 等待任务完成 - instance.waitForSuccess(); - log.info("SQLTask run success, sql: {}", sql); - - downloadSession = new InstanceTunnel(odps).createDownloadSession(odps.getDefaultProject(), instance.getId(), false); - - } catch (OdpsException e) { - log.error("SQLTask run error, sql: {}", sql, e); - throw DataproxyException.of(DataproxyErrorCode.ODPS_ERROR, e.getMessage(), e); - } - - return this; - } - - private String buildSql(String tableName, List fields, String whereClause) { - - if (!columnOrValuePattern.matcher(tableName).matches()) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Invalid tableName:" + tableName); - } - - // 普通表不再拼接条件语句 - if (!partitioned) { - whereClause = ""; - } - //TODO: 条件判断逻辑调整 - if (!whereClause.isEmpty()) { - String[] groups = whereClause.split("[,/]"); - if (groups.length > 1) { - final PartitionSpec partitionSpec = new PartitionSpec(whereClause); - - for (String key : partitionSpec.keys()) { - if (!columnOrValuePattern.matcher(key).matches()) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Invalid partition key:" + key); - } - if (!columnOrValuePattern.matcher(partitionSpec.get(key)).matches()) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Invalid partition value:" + partitionSpec.get(key)); - } - } - - List list = partitionSpec.keys().stream().map(k -> k + "='" + partitionSpec.get(k) + "'").toList(); - whereClause = String.join(" and ", list); - } - } - - log.info("whereClause: {}", whereClause); - - return "select " + String.join(",", fields) + " from " + tableName + (whereClause.isEmpty() ? "" : " where " + whereClause) + ";"; - } - - private void toArrowVector(Record record, VectorSchemaRoot root, int rowIndex) { - FieldVector vector; - String columnName; - for (Field field : schema.getFields()) { - vector = root.getVector(field); - if (vector != null) { - // odps 获取到的字段名为小写,此处做一下兼容 - columnName = field.getName().toLowerCase(); - - if (this.hasColumn(columnName)) { - this.setValue(vector.getField().getType(), vector, rowIndex, record, columnName); - } - } - } - } - - private boolean hasColumn(String columnName) { - return columns.contains(columnName); - } - - private void setValue(ArrowType type, FieldVector vector, int rowIndex, Record record, String columnName) { - Object columnValue = record.get(columnName); - log.debug("columnName: {} type ID: {}, index:{}, value: {}", columnName, type.getTypeID(), rowIndex, columnValue); - - if (columnValue == null) { - vector.setNull(rowIndex); -// log.warn("set null, columnName: {} type ID: {}, index:{}, value: {}", columnName, type.getTypeID(), rowIndex, record); - return; - } - switch (type.getTypeID()) { - case Int -> { - if (vector instanceof SmallIntVector smallIntVector) { - smallIntVector.set(rowIndex, Short.parseShort(columnValue.toString())); - } else if (vector instanceof IntVector intVector) { - intVector.set(rowIndex, Integer.parseInt(columnValue.toString())); - } else if (vector instanceof BigIntVector bigIntVector) { - bigIntVector.set(rowIndex, Long.parseLong(columnValue.toString())); - } else if (vector instanceof TinyIntVector tinyIntVector) { - tinyIntVector.set(rowIndex, Byte.parseByte(columnValue.toString())); - } else { - log.warn("Unsupported type: {}", type); - } - } - case Utf8 -> { - if (vector instanceof VarCharVector varcharVector) { - // record#getBytes default is UTF-8 - varcharVector.setSafe(rowIndex, record.getBytes(columnName)); - } else { - log.warn("Unsupported type: {}", type); - } - } - case FloatingPoint -> { - if (vector instanceof Float4Vector floatVector) { - floatVector.set(rowIndex, Float.parseFloat(columnValue.toString())); - } else if (vector instanceof Float8Vector doubleVector) { - doubleVector.set(rowIndex, Double.parseDouble(columnValue.toString())); - } else { - log.warn("Unsupported type: {}", type); - } - } - case Bool -> { - if (vector instanceof BitVector bitVector) { - - // switch str { - // case "1", "t", "T", "true", "TRUE", "True": - // return true, nil - // case "0", "f", "F", "false", "FALSE", "False": - // return false, nil - bitVector.set(rowIndex, record.getBoolean(columnName) ? 1 : 0); - } else { - log.warn("ArrowType ID is Bool: Unsupported type: {}", vector.getClass()); - } - } - - default -> throw new IllegalArgumentException("Unsupported type: " + type); - } - - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsUtil.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsUtil.java deleted file mode 100644 index d69b713..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsUtil.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2024 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.secretflow.dataproxy.manager.connector.odps; - -import com.aliyun.odps.Odps; -import com.aliyun.odps.account.Account; -import com.aliyun.odps.account.AliyunAccount; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; - -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * odps util - * - * @author yuexie - * @date 2024-06-01 17:08:45 - */ -public class OdpsUtil { - - public static Odps buildOdps(OdpsConnConfig odpsConnConfig) { - Account account = new AliyunAccount(odpsConnConfig.getAccessKeyId(), odpsConnConfig.getAccessKeySecret()); - Odps odps = new Odps(account); - odps.setEndpoint(odpsConnConfig.getEndpoint()); - odps.setDefaultProject(odpsConnConfig.getProjectName()); - - return odps; - } - - public static Map getSqlFlag() { - HashMap hints = new LinkedHashMap<>(); - hints.put("odps.sql.type.system.odps2", "true"); - return hints; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcAssistant.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcAssistant.java deleted file mode 100644 index 2c9d7cf..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcAssistant.java +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.dataset.format.IndexType; -import org.secretflow.dataproxy.common.model.dataset.format.TableFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.format.TableIndex; -import org.secretflow.dataproxy.common.model.datasource.conn.JdbcBaseConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.JdbcLocationConfig; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder.ColumnBinder; - -import com.zaxxer.hikari.HikariConfig; -import com.zaxxer.hikari.HikariDataSource; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.sql.Connection; -import java.sql.JDBCType; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeFormatterBuilder; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * jdbc辅助类 - * - * @author muhong - * @date 2023-09-07 19:55 - */ -public interface JdbcAssistant { - - /** - * 获取连接校验查询语句 - * - * @return - */ - String getConnectionTestQuery(); - - /** - * 获取连接驱动类 - * - * @return - */ - String getDriverClass(); - - /** - * 初始化 datasource 的 url、catalog 和 schema 配置 - * - * @param config datasource hikari config - * @param connConfig jdbc 连接参数 - */ - void initDataSourceConfig(HikariConfig config, C connConfig); - - default void fillDefaultValue(C connConfig, L locationConfig) { - } - - /** - * 装饰标识符,如 mysql 将在标识符前后加上 `
- * 标识符包括: database, table, index, column, alias, view, stored procedure, partition, - * tablespace, resource group and other object - * - * @param identifier 装饰前的标识符 - * @return 装饰后的标识符 - */ - String decorateIdentifier(String identifier); - - default String decorateStrValue(String value) { - return "'" + value + "'"; - } - - /** - * 组装 tableName,如 DB2 将改变 tableName 为 schemaName.tableName - * - * @param locationConfig - * @return 组装后的 tableName - */ - String composeTableName(L locationConfig); - - /** - * 是否支持PreparedStatement批量插入 - * - * @return - */ - default boolean supportBatchInsert() { - return true; - } - - /** - * 生成列查询语句 - * - * @param rawFieldName - * @param composeTableName - * @return - */ - default String createFieldPart(List rawFieldName, String composeTableName) { - List requestedColumnNameList = rawFieldName.stream() - .map(this::decorateIdentifier) - .collect(Collectors.toList()); - StringBuilder selectSqlBuilder = new StringBuilder(); - selectSqlBuilder.append(StringUtils.join(requestedColumnNameList, ", ")); - selectSqlBuilder.append(" from ").append(composeTableName).append(" "); - return selectSqlBuilder.toString(); - } - - /** - * 查询SQL模板 - * - * @return 查询SQL模板 - */ - default String selectSQLTemplate() { - return "select ${sqlPart} ${limitPart}"; - } - - default String generateLimitConditionTemplate(boolean otherFilter) { - return "limit %s"; - } - - /** - * 建表SQL - * - * @param schema arrow数据格式 - * @param formatConfig 数据格式配置 - * @return SQL - */ - default String createTableSql(String composeTableName, Schema schema, TableFormatConfig formatConfig) { - return "CREATE TABLE " + composeTableName + " (" - + createTableColumnTypes(schema.getFields(), formatConfig) - + "," + decorateIdentifier(formatConfig.getPrimaryKey()) - + " BIGINT PRIMARY KEY NOT NULL AUTO_INCREMENT" - + createIndex(formatConfig.getIndexList(), schema.getFields()) - + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4"; - } - - /** - * 在目标数据库里预先执行 SQL 的列表 - * - * @param composeTableName 列定义对象列表 - * @param schema 数据结构 - * @param formatConfig - * @return SQL 列表 - */ - List preWorkSqls(String composeTableName, Schema schema, L locationConfig, TableFormatConfig formatConfig); - - /** - * 生成drop table sql 语句 - * - * @param composeTableName 组合后完整的表名 - * @return sql - */ - default String dropTableSql(String composeTableName) { - return "DROP TABLE IF EXISTS " + composeTableName; - } - - /** - * 创建列定义sql - * - * @param fields 列定义对象列表 - * @return 列定义sql - */ - default String createTableColumnTypes(List fields, TableFormatConfig formatConfig) { - return fields.stream() - .filter(field -> !field.getName().equals(formatConfig.getPrimaryKey())) - .map(this::arrowFieldToSqlColumnDefinition) - .collect(Collectors.joining(",")); - } - - /** - * 单列定义sql - * - * @param field 列定义对象 - * @return sql - */ - default String arrowFieldToSqlColumnDefinition(Field field) { - return decorateIdentifier(field.getName()) + " " + jdbcTypeToDbTypeString(arrowTypeToJdbcType(field)); - } - - /** - * arrow field 转 jdbctype - * - * @param field - * @return - */ - default JDBCType arrowTypeToJdbcType(Field field) { - // 定义一个临时vector - try (BufferAllocator tempAllocator = new RootAllocator(); - FieldVector tempVector = field.createVector(tempAllocator)) { - ColumnBinder columnBinder = ColumnBinder.forVector(tempVector); - return JDBCType.valueOf(columnBinder.getJdbcType()); - } - } - - /** - * jdbc 类型转化为数据库数据类型的字符串形式 - * - * @param jdbcType jdbc 类型 - * @return 数据类型的字符串形式 - */ - String jdbcTypeToDbTypeString(JDBCType jdbcType); - - /** - * 构建索引 - * - * @param indexList - * @return - */ - default String createIndex(List indexList, List fields) { - Map fieldMap = fields.stream().collect(Collectors.toMap(Field::getName, field -> field)); - - StringBuilder stringBuilder = new StringBuilder(); - if (CollectionUtils.isNotEmpty(indexList)) { - for (int i = 0; i < indexList.size(); i++) { - TableIndex index = indexList.get(i); - if (CollectionUtils.isNotEmpty(index.getField())) { - stringBuilder.append(","); - stringBuilder.append(indexKeyword(index.getType())); - stringBuilder.append(" "); - stringBuilder.append(decorateIdentifier("idx_" + i)); - stringBuilder.append(" ("); - stringBuilder.append(index.getField().stream() - .map(fieldName -> { - String decorateIdentifier = decorateIdentifier(fieldName); - Field field = fieldMap.get(fieldName); - - // 字符串类型需要限制索引长度 - if (field.getFieldType().getType().getTypeID() == ArrowType.ArrowTypeID.Utf8 - || field.getFieldType().getType().getTypeID() == ArrowType.ArrowTypeID.LargeUtf8) { - decorateIdentifier = decorateIdentifier + "(128)"; - } - return decorateIdentifier; - }) - .collect(Collectors.joining(","))); - stringBuilder.append(") "); - } - } - } - return stringBuilder.toString(); - } - - /** - * 根据索引类型获取关键词 - * - * @param indexType - * @return - */ - String indexKeyword(IndexType indexType); - - /** - * 获取数据库连接 - * - * @return - */ - default Connection getDatabaseConn(HikariDataSource dataSource) { - try { - return dataSource.getConnection(); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.JDBC_DATASOURCE_CONNECTION_VALIDATE_FAILED, e); - } - } - - /** - * 数据序列化为字符串 - * - * @param value 原始数据 - * @return - */ - default String serialize(JDBCType type, Object value) { - // 文本数据无法区分为空内容还是null,序列化为空内容 - if (value == null) { - return null; - } - - if (value instanceof Double || value instanceof Float || value instanceof Short || value instanceof Byte - || value instanceof Integer || value instanceof Long || value instanceof Boolean - || value instanceof BigDecimal || value instanceof BigInteger) { - return value.toString(); - } - - // 字节数组单独处理 - if (value instanceof byte[]) { - return decorateStrValue(new String((byte[]) value)); - } - if (value instanceof Text) { - return decorateStrValue(value.toString()); - } - if (value instanceof LocalDateTime) { - return decorateStrValue(((LocalDateTime) value).format(new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .append(DateTimeFormatter.ISO_LOCAL_DATE) - .appendLiteral(' ') - .append(DateTimeFormatter.ISO_LOCAL_TIME) - .toFormatter())); - } - - // 兜底,都使用string传输 - return decorateStrValue(value.toString()); - } -} \ No newline at end of file diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcConnector.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcConnector.java deleted file mode 100644 index 49af08b..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcConnector.java +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.InferSchemaResult; -import org.secretflow.dataproxy.common.model.command.DatasetReadCommand; -import org.secretflow.dataproxy.common.model.command.DatasetWriteCommand; -import org.secretflow.dataproxy.common.model.dataset.DatasetFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.format.*; -import org.secretflow.dataproxy.common.model.datasource.DatasourceTypeEnum; -import org.secretflow.dataproxy.common.model.datasource.conn.JdbcBaseConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.JdbcLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.location.LocationConfig; -import org.secretflow.dataproxy.common.utils.IdUtils; -import org.secretflow.dataproxy.manager.Connector; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.DataWriter; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowConfig; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowConfigBuilder; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowUtils; - -import com.zaxxer.hikari.HikariConfig; -import com.zaxxer.hikari.HikariDataSource; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.collections4.MapUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringSubstitutor; - -import java.math.RoundingMode; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.*; -import java.util.stream.Collectors; - -/** - * @author muhong - * @date 2023-09-07 13:47 - */ -public class JdbcConnector implements Connector { - - protected JdbcBaseConnConfig connConfig; - - protected HikariDataSource dataSource; - - protected JdbcAssistant jdbcAssistant; - - public JdbcConnector() { - } - - public JdbcConnector(DatasourceTypeEnum type, JdbcBaseConnConfig connConfig) { - // 构造jdbc辅助类 - switch (type) { - case MYSQL: - this.jdbcAssistant = new MysqlJdbcAssistant(); - break; - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的jdbc数据源类型 " + type); - } - - HikariConfig config = new HikariConfig(); - config.setUsername(connConfig.getUserName()); - config.setPassword(connConfig.getPassword()); - config.setDriverClassName(this.jdbcAssistant.getDriverClass()); - config.setConnectionTestQuery(this.jdbcAssistant.getConnectionTestQuery()); - config.setMaximumPoolSize(connConfig.getMaximumPoolSize()); - config.setMinimumIdle(connConfig.getMinimumIdle()); - config.addDataSourceProperty("cachePrepStmts", connConfig.getCachePrepStmts()); - config.addDataSourceProperty("useServerPrepStmts", connConfig.getUseServerPrepStmts()); - config.addDataSourceProperty("prepStmtCacheSize", connConfig.getPrepStmtCacheSize()); - config.addDataSourceProperty("prepStmtCacheSqlLimit", connConfig.getPrepStmtCacheSqlLimit()); - config.addDataSourceProperty("allowLoadLocalInfile", "false"); - config.addDataSourceProperty("allowUrlInLocalInfile", "false"); - config.addDataSourceProperty("allowLoadLocalInfileInPath", ""); - config.addDataSourceProperty("autoDeserialize", "false"); - - // 不同数据库对 catalog 和 schema 的使用方法不同,所以交给子类处理 - this.jdbcAssistant.initDataSourceConfig(config, connConfig); - this.connConfig = connConfig; - try { - dataSource = new HikariDataSource(config); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.CREATE_DATASOURCE_CONNECTOR_ERROR, e.getMessage(), e); - } - checkAdaptorStatus(); - } - - /** - * 获取真实字段名 - * - * @param viewFieldName 展示列名 - * @param fieldMap 真实列名-展示列名映射 - * @return 真实列名 - */ - public static String getRawFieldName(String viewFieldName, Map fieldMap) { - // 若列名为空或映射关系为空,直接返回 - if (StringUtils.isEmpty(viewFieldName) || MapUtils.isEmpty(fieldMap)) { - return viewFieldName; - } - - for (Map.Entry entry : fieldMap.entrySet()) { - if (entry.getValue().equals(viewFieldName)) { - return entry.getKey(); - } - } - - // 映射关系中没有的,直接展示列名 - return viewFieldName; - } - - @Override - public InferSchemaResult inferSchema(BufferAllocator allocator, LocationConfig locationConfig, DatasetFormatConfig formatConfig) { - this.jdbcAssistant.fillDefaultValue(connConfig, (JdbcLocationConfig) locationConfig); - String table = ((JdbcLocationConfig) locationConfig).getTable(); - - TableFormatConfig reqFormatConfig = (TableFormatConfig) formatConfig.getFormatConfig(); - TableFormatConfig resultFormatConfig = reqFormatConfig == null ? TableFormatConfig.builder().build() : reqFormatConfig.toBuilder().build(); - - try (Connection conn = this.jdbcAssistant.getDatabaseConn(this.dataSource)) { - - // schema推断 - Schema schema = getSchema(allocator, conn, this.jdbcAssistant.composeTableName((JdbcLocationConfig) locationConfig)); - - // 原始数据字段集合 - Set fieldNameSet = schema.getFields().stream().map(Field::getName).collect(Collectors.toSet()); - Set viewFieldNameSet = new HashSet<>(); - - // 字段映射检查 - if (MapUtils.isNotEmpty(resultFormatConfig.getFieldMap())) { - resultFormatConfig.getFieldMap().forEach((rawFieldName, viewFieldName) -> { - if (!fieldNameSet.contains(rawFieldName)) { - throw DataproxyException.of(DataproxyErrorCode.FIELD_NOT_EXIST, "映射字段 " + rawFieldName); - } - if (viewFieldNameSet.contains(viewFieldName)) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "字段 " + viewFieldName + " 重复"); - } - viewFieldNameSet.add(viewFieldName); - }); - - fieldNameSet.stream() - .filter(fieldName -> !resultFormatConfig.getFieldMap().containsKey(fieldName)) - .forEach(fieldName -> { - if (viewFieldNameSet.contains(fieldName)) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "字段 " + fieldName + " 重复"); - } - viewFieldNameSet.add(fieldName); - resultFormatConfig.getFieldMap().put(fieldName, fieldName); - }); - } else { - resultFormatConfig.setFieldMap(fieldNameSet.stream().collect(Collectors.toMap(name -> name, name -> name))); - viewFieldNameSet.addAll(fieldNameSet); - } - - // 主键推断与校验 - if (StringUtils.isNotEmpty(resultFormatConfig.getPrimaryKey())) { - // 如果用户填写参数中已经包含主键,则校验字段存在性 - if (!fieldNameSet.contains(getRawFieldName(resultFormatConfig.getPrimaryKey(), resultFormatConfig.getFieldMap()))) { - throw DataproxyException.of(DataproxyErrorCode.FIELD_NOT_EXIST, "主键 " + resultFormatConfig.getPrimaryKey()); - } - } else { - String primaryKey = getPrimaryKeyColumnName(conn, table); - resultFormatConfig.setPrimaryKey(primaryKey); - } - - // 索引推断与校验 -// if (CollectionUtils.isNotEmpty(resultFormatConfig.getIndex())) { -// resultFormatConfig.getIndex().forEach(index -> { -// if (CollectionUtils.isEmpty(index.getField())) { -// throw FastDFException.of(DataProxyErrorCode.PARAMS_UNRELIABLE, "索引 " + index.getIndexName() + " 字段为空"); -// } -// if (!viewFieldNameSet.containsAll(index.getField())) { -// throw FastDFException.of(DataProxyErrorCode.PARAMS_UNRELIABLE, "索引 " + index.getIndexName() + " 字段不存在"); -// } -// }); -// } else { -// // 查询索引 -// List indexList = getIndex(conn, table); -// resultFormatConfig.setIndex(indexList); -// } - - // 原先就填写了分页字段,需要做严格校验 - if (resultFormatConfig.getPartitionBehavior() != null && StringUtils.isNotEmpty(resultFormatConfig.getPartitionBehavior().getFieldName())) { - String partitionFieldName = resultFormatConfig.getPartitionBehavior().getFieldName(); - if (!viewFieldNameSet.contains(partitionFieldName)) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "分页字段 " + partitionFieldName + " 不存在"); - } - ArrowType.ArrowTypeID partitionFieldTypeId = schema.findField(partitionFieldName).getFieldType().getType().getTypeID(); - - // 当且仅当主键为整型、浮点型、时间等能做数值计算时,可以作为分页条件 - if (!(ArrowType.ArrowTypeID.Int.equals(partitionFieldTypeId) - || ArrowType.ArrowTypeID.Decimal.equals(partitionFieldTypeId) - || ArrowType.ArrowTypeID.FloatingPoint.equals(partitionFieldTypeId) - || ArrowType.ArrowTypeID.Date.equals(partitionFieldTypeId))) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "分页字段 " + partitionFieldName + " 无法做数值计算"); - } - resultFormatConfig.getPartitionBehavior().setType(partitionFieldTypeId); - } else if (StringUtils.isNotEmpty(resultFormatConfig.getPrimaryKey())) { - // 原先没有填写分页参数,尝试推导 - // 若未指定分页参数,尝试用主键作为分页条件 - ArrowType.ArrowTypeID primaryKeyTypeId = schema.findField(resultFormatConfig.getPrimaryKey()).getFieldType().getType().getTypeID(); - if (ArrowType.ArrowTypeID.Int.equals(primaryKeyTypeId) - || ArrowType.ArrowTypeID.Decimal.equals(primaryKeyTypeId) - || ArrowType.ArrowTypeID.FloatingPoint.equals(primaryKeyTypeId) - || ArrowType.ArrowTypeID.Date.equals(primaryKeyTypeId)) { - resultFormatConfig.setPartitionBehavior(PartitionBehavior.builder() - .fieldName(resultFormatConfig.getPrimaryKey()) - .type(primaryKeyTypeId) - .build()); - } - } else { - resultFormatConfig.setPartitionBehavior(null); - } - - return InferSchemaResult.builder() - .schema(schema) - .datasetFormatConfig(DatasetFormatConfig.builder() - .type(DatasetFormatTypeEnum.TABLE) - .formatConfig(resultFormatConfig) - .build()) - .build(); - } catch (SQLException e) { - throw DataproxyException.of(DataproxyErrorCode.JDBC_CALL_ERROR, "表结构推断失败", e); - } - } - - @Override - public DataReader buildReader(BufferAllocator allocator, DatasetReadCommand readCommand) { - JdbcLocationConfig jdbcLocationConfig = (JdbcLocationConfig) readCommand.getLocationConfig().getLocationConfig(); - this.jdbcAssistant.fillDefaultValue(connConfig, jdbcLocationConfig); - - // 列名缺省处理 - if (CollectionUtils.isEmpty(readCommand.getFieldList())) { - readCommand.setFieldList(readCommand.getSchema().getFields().stream() - .map(Field::getName) - .collect(Collectors.toList()) - ); - } - - return new JdbcDataReader(allocator, - this.jdbcAssistant, - this.dataSource, - (TableFormatConfig) readCommand.getFormatConfig().getFormatConfig(), - readCommand.getOutputFormatConfig(), - this.jdbcAssistant.composeTableName(jdbcLocationConfig), - readCommand.getSchema(), - readCommand.getFieldList(), - readCommand.getFilter()); - } - - @Override - public DataWriter buildWriter(DatasetWriteCommand writeCommand) { - JdbcLocationConfig jdbcLocationConfig = (JdbcLocationConfig) writeCommand.getLocationConfig().getLocationConfig(); - this.jdbcAssistant.fillDefaultValue(connConfig, jdbcLocationConfig); - - if (writeCommand.getFormatConfig().getFormatConfig() == null) { - writeCommand.getFormatConfig().setFormatConfig(TableFormatConfig.builder().build()); - } - TableFormatConfig formatConfig = (TableFormatConfig) writeCommand.getFormatConfig().getFormatConfig(); - - // 未定义主键,需要补充一个 - if (StringUtils.isEmpty(formatConfig.getPrimaryKey())) { - String primaryKey = "pk_" + IdUtils.createRandString(6); - formatConfig.setPrimaryKey(primaryKey); - } - return new JdbcDataWriter(this.jdbcAssistant, - this.dataSource, - this.jdbcAssistant.composeTableName(jdbcLocationConfig), jdbcLocationConfig, - formatConfig, - writeCommand.getSchema()); - } - - @Override - public boolean isAvailable() { - try { - checkAdaptorStatus(); - return true; - } catch (Exception e) { - return false; - } - } - - /** - * 连通性测试 - */ - public void checkAdaptorStatus() { - try (Connection conn = this.jdbcAssistant.getDatabaseConn(dataSource); - PreparedStatement preparedStatement = conn.prepareStatement(this.jdbcAssistant.getConnectionTestQuery())) { - preparedStatement.execute(); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.JDBC_DATASOURCE_CONNECTION_VALIDATE_FAILED, e); - } - } - - protected Schema getSchema(BufferAllocator allocator, Connection conn, String tableName) throws SQLException { - Map valuesMap = new HashMap<>(); - valuesMap.put("sqlPart", "* from " + tableName); - valuesMap.put("limitPart", String.format(this.jdbcAssistant.generateLimitConditionTemplate(false), 1)); - String sampleSql = new StringSubstitutor(valuesMap).replace(this.jdbcAssistant.selectSQLTemplate()); - try (ResultSet sampleRs = conn.createStatement().executeQuery(sampleSql)) { - // 设置浮点型精度规则,四舍五入,保证精度有损场景 - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder() - .setAllocator(allocator) - .setBigDecimalRoundingMode(RoundingMode.CEILING) - .build(); - // schema推断 - return JdbcToArrowUtils.jdbcToArrowSchema(sampleRs.getMetaData(), config); - } - } - - /** - * 获取主键名称
- *

- * hive数据库不需要 - * - * @param conn 数据库连接 - * @param tableName 表名 - * @return 主键名称 - * @throws SQLException - */ - protected String getPrimaryKeyColumnName(Connection conn, String tableName) throws SQLException { - String primaryKeyColumnName; - try (ResultSet primaryKeyResultSet = conn.getMetaData().getPrimaryKeys( - dataSource.getCatalog(), dataSource.getSchema(), tableName - )) { - if (!primaryKeyResultSet.next()) { - return null; - } - primaryKeyColumnName = primaryKeyResultSet.getString("COLUMN_NAME"); - if (StringUtils.isEmpty(primaryKeyColumnName)) { - return null; - } - } - return primaryKeyColumnName; - } - - /** - * 索引查询 - * - * @param conn 数据库连接 - * @param tableName 表名 - * @return - * @throws SQLException - */ - protected List getIndex(Connection conn, String tableName) throws SQLException { - Map indexMap = new HashMap<>(); - - try (ResultSet indexResultSet = conn.getMetaData().getIndexInfo( - dataSource.getCatalog(), dataSource.getSchema(), tableName, false, false - )) { - while (indexResultSet.next()) { - String colName = indexResultSet.getString("COLUMN_NAME"); - String indexName = indexResultSet.getString("INDEX_NAME"); - - if (!indexMap.containsKey(indexName)) { - indexMap.put(indexName, TableIndex.builder() - .indexName(indexName) - .type(indexResultSet.getBoolean("NON_UNIQUE") ? IndexType.INDEX : IndexType.UNIQUE) - .field(new ArrayList<>()) - .build()); - } - TableIndex index = indexMap.get(indexName); - index.getField().add(colName); - } - } - return new ArrayList<>(indexMap.values()); - } - - @Override - public void close() throws Exception { - this.dataSource.close(); - } -} \ No newline at end of file diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataReader.java deleted file mode 100644 index cd8c70d..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataReader.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.FlightContentFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.format.PartitionBehavior; -import org.secretflow.dataproxy.common.model.dataset.format.TableFormatConfig; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.SplitReader; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowConfig; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowConfigBuilder; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowUtils; - -import com.zaxxer.hikari.HikariDataSource; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ValueVectorUtility; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringSubstitutor; - -import java.io.IOException; -import java.math.BigDecimal; -import java.math.MathContext; -import java.math.RoundingMode; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * @author muhong - * @date 2023-09-07 17:57 - */ -public class JdbcDataReader implements DataReader { - - protected static final int PARTITION_STAT_SAMPLE_ROW_COUNT = 100; - private static final long MEMORY_LIMIT = 100 * 1024 * 1024; - /** - * 数据库连接 - */ - private final HikariDataSource dataSource; - private final JdbcAssistant jdbcAssistant; - private final BufferAllocator allocator; - private final TableFormatConfig sourceSchemaConfig; - private final FlightContentFormatConfig outputFormatConfig; - private final List fieldList; - private final String filter; - /** - * 数据源连接配置 - */ - private final String composeTableName; - private final Schema schema; - - - public JdbcDataReader(BufferAllocator allocator, - JdbcAssistant jdbcAssistant, - HikariDataSource dataSource, - TableFormatConfig sourceSchemaConfig, - FlightContentFormatConfig outputFormatConfig, - String composeTableName, - Schema schema, - List fieldList, - String filter) { - - this.allocator = allocator; - this.jdbcAssistant = jdbcAssistant; - this.dataSource = dataSource; - this.schema = schema; - this.sourceSchemaConfig = sourceSchemaConfig; - this.fieldList = fieldList; - this.filter = filter; - this.outputFormatConfig = outputFormatConfig; - this.composeTableName = composeTableName; - } - - @Override - public List createSplitReader(int splitNumber) { - fillPartitionBehavior(); - - // todo: 根据splitNumber与partitionBehavior分区 - return Arrays.asList(new JdbcSplitReader(allocator, jdbcAssistant, outputFormatConfig, dataSource, composeTableName, schema, this.sourceSchemaConfig.getPartitionBehavior(), fieldList, filter)); - } - - private void fillPartitionBehavior() { - try { - PartitionBehavior partitionBehavior = sourceSchemaConfig.getPartitionBehavior(); - if (partitionBehavior == null || StringUtils.isEmpty(partitionBehavior.getFieldName())) { - return; - } - - String partitionField = this.jdbcAssistant.decorateIdentifier(partitionBehavior.getFieldName()); - - try (Connection conn = this.jdbcAssistant.getDatabaseConn(dataSource)) { - - // 设置最大值 - if (StringUtils.isEmpty(partitionBehavior.getUpperBound())) { - Map valuesMap = new HashMap<>(); - valuesMap.put("sqlPart", "max(" + partitionField + ") from " + composeTableName); - valuesMap.put("limitPart", ""); - String maxPkSql = new StringSubstitutor(valuesMap).replace(this.jdbcAssistant.selectSQLTemplate()); - try (ResultSet maxPkRs = conn.createStatement().executeQuery(maxPkSql)) { - if (!maxPkRs.next()) { - return; - } - String maxStr = maxPkRs.getObject(1).toString(); - partitionBehavior.setUpperBound(maxStr); - } - } - - // 设置最小值 - if (StringUtils.isEmpty(partitionBehavior.getLowerBound())) { - Map valuesMap = new HashMap<>(); - valuesMap.put("sqlPart", "min(" + partitionField + ") from " + composeTableName); - valuesMap.put("limitPart", ""); - String minPkSql = new StringSubstitutor(valuesMap).replace(this.jdbcAssistant.selectSQLTemplate()); - try (ResultSet minPkRs = conn.createStatement().executeQuery(minPkSql)) { - if (!minPkRs.next()) { - return; - } - String minStr = minPkRs.getObject(1).toString(); - partitionBehavior.setLowerBound(minStr); - } - } - - // 步进间隔估计 - if (StringUtils.isEmpty(partitionBehavior.getStep())) { - Map valuesMap = new HashMap<>(); - - // 计算总数 - long count = 0; - valuesMap.put("sqlPart", "count(*) from " + composeTableName); - valuesMap.put("limitPart", ""); - String countSql = new StringSubstitutor(valuesMap).replace(this.jdbcAssistant.selectSQLTemplate()); - try (ResultSet countRs = conn.createStatement().executeQuery(countSql)) { - countRs.next(); - count = countRs.getLong(1); - } - // 数据集中没有数据,则直接返回 - if (count == 0) { - return; - } - - valuesMap.put("sqlPart", this.jdbcAssistant.createFieldPart(this.fieldList, this.composeTableName)); - valuesMap.put("limitPart", String.format( - this.jdbcAssistant.generateLimitConditionTemplate(false), PARTITION_STAT_SAMPLE_ROW_COUNT - )); - String sampleSql = new StringSubstitutor(valuesMap).replace(this.jdbcAssistant.selectSQLTemplate()); - try (ResultSet sampleRs = conn.createStatement().executeQuery(sampleSql)) { - // 将示例数据转换为arrow - int sampleRowCount = 0; - long sampleDataSize = 0; - - // 设置浮点型精度规则,四舍五入,保证精度有损场景 - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder() - .setAllocator(allocator) - .setTargetBatchSize(PARTITION_STAT_SAMPLE_ROW_COUNT) - .setBigDecimalRoundingMode(RoundingMode.CEILING) - .build(); - - try (VectorSchemaRoot root = VectorSchemaRoot.create( - JdbcToArrowUtils.jdbcToArrowSchema(sampleRs.getMetaData(), config), config.getAllocator())) { - if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); - } - JdbcToArrowUtils.jdbcToArrowVectors(sampleRs, root, config); - // 总空间大小为每列求和 - sampleRowCount = root.getRowCount(); - sampleDataSize = root.getFieldVectors().stream().map(ValueVector::getBufferSize).reduce(0, Integer::sum); - } - - // 步进间隔 = (最大值 - 最小值) / ((采样大小 / 采样行数) * 总行数 / 期望内存 + 1) - BigDecimal dataInterval = new BigDecimal(partitionBehavior.getUpperBound()).divide(new BigDecimal(partitionBehavior.getLowerBound())); - if (dataInterval.compareTo(BigDecimal.ZERO) == 0) { - partitionBehavior.setStep("1"); - } else { - int partitionSize = (int) Math.ceil((double) sampleDataSize / sampleRowCount * count / MEMORY_LIMIT); - BigDecimal step = dataInterval.divide(new BigDecimal(partitionSize), MathContext.DECIMAL32); - partitionBehavior.setStep(step.toString()); - } - } - } - } - } catch (SQLException | IOException e) { - throw DataproxyException.of(DataproxyErrorCode.JDBC_GET_PARTITION_STATS_FAILED, e); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataWriter.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataWriter.java deleted file mode 100644 index 5b28999..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcDataWriter.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms; - -import com.zaxxer.hikari.HikariDataSource; -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.dataset.format.TableFormatConfig; -import org.secretflow.dataproxy.common.model.datasource.location.JdbcLocationConfig; -import org.secretflow.dataproxy.common.utils.JsonUtils; -import org.secretflow.dataproxy.manager.DataWriter; - -import java.io.IOException; -import java.sql.Connection; -import java.sql.SQLException; -import java.util.List; - -/** - * jdbc数据源写入 - * - * @author muhong - * @date 2023-09-08 15:37 - */ -@Slf4j -public class JdbcDataWriter implements DataWriter { - - /** - * 数据库连接 - */ - protected HikariDataSource dataSource; - - /** - * 表名 - */ - protected String composeTableName; - - protected JdbcAssistant jdbcAssistant; - - protected JdbcLocationConfig locationConfig; - - protected TableFormatConfig formatConfig; - - //the statement in the format of either merge into or insert into sql statement - protected String stmt; - - private boolean initialized; - - public JdbcDataWriter() { - } - - public JdbcDataWriter(JdbcAssistant jdbcAssistant, HikariDataSource dataSource, String composeTableName, JdbcLocationConfig locationConfig, TableFormatConfig formatConfig, Schema schema) { - this.jdbcAssistant = jdbcAssistant; - this.dataSource = dataSource; - this.initialized = false; - this.formatConfig = formatConfig; - this.locationConfig = locationConfig; - - this.composeTableName = composeTableName; - - ensureInitialized(schema); - } - - protected void ensureInitialized(Schema schema) { - if (!this.initialized) { - this.initialize(schema); - this.initialized = true; - } - } - - protected void initialize(Schema schema) { - List preSqlList = this.jdbcAssistant.preWorkSqls(this.composeTableName, schema, this.locationConfig, this.formatConfig); - log.info("[JdbcDataWriter] preSql execute start, sql: {}", JsonUtils.toJSONString(preSqlList)); - - try (Connection conn = this.jdbcAssistant.getDatabaseConn(dataSource)) { - // do nothing - // Avoid SQL injection issues - // About to Delete - } catch (SQLException e) { - throw DataproxyException.of(DataproxyErrorCode.JDBC_CREATE_TABLE_FAILED, e.getMessage(), e); - } - - // 构造sql预提交模板 - this.stmt = String.format("insert into %s(%s) values(%s)", composeTableName, - String.join(",", schema.getFields().stream().map(field -> this.jdbcAssistant.decorateIdentifier(field.getName())).toArray(String[]::new)), - String.join(",", schema.getFields().stream().map(field -> "?").toArray(String[]::new))); - } - - @Override - public void write(VectorSchemaRoot root) throws IOException { - throw DataproxyException.of(DataproxyErrorCode.JDBC_INSERT_INTO_TABLE_FAILED, "jdbc not support write"); - } - - @Override - public void flush() throws IOException { - - } - - @Override - public void destroy() throws IOException { - - } - - @Override - public void close() throws Exception { - try { - if (this.dataSource != null) { - this.dataSource.close(); - } - } catch (Exception ignored) { - } - } -} \ No newline at end of file diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcSplitReader.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcSplitReader.java deleted file mode 100644 index c63e4fe..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/JdbcSplitReader.java +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.FlightContentFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.format.PartitionBehavior; -import org.secretflow.dataproxy.common.utils.JsonUtils; -import org.secretflow.dataproxy.manager.SplitReader; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowConfig; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowConfigBuilder; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.JdbcToArrowUtils; - -import com.zaxxer.hikari.HikariDataSource; -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringSubstitutor; - -import java.io.IOException; -import java.math.RoundingMode; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * @author muhong - * @date 2023-09-07 19:47 - */ -@Slf4j -public class JdbcSplitReader extends ArrowReader implements SplitReader { - - private final PartitionBehavior partitionBehavior; - /** - * 数据库连接 - */ - private HikariDataSource dataSource; - /** - * 表名 - */ - private String composeTableName; - private JdbcAssistant jdbcAssistant; - private FlightContentFormatConfig outputFormatConfig; - - private List fieldList; - - private String filter; - - private Schema schema; - - private String currentPartition = null; - - private int currentSize = 0; - - private String sqlPartTemplate; - private JdbcToArrowConfig config; - - public JdbcSplitReader(BufferAllocator allocator, - JdbcAssistant jdbcAssistant, - FlightContentFormatConfig outputFormatConfig, - HikariDataSource dataSource, - String composeTableName, - Schema schema, - PartitionBehavior partitionBehavior, - List fieldList, - String filter) { - super(allocator); - this.jdbcAssistant = jdbcAssistant; - this.dataSource = dataSource; - this.composeTableName = composeTableName; - this.partitionBehavior = partitionBehavior; - this.fieldList = fieldList; - this.filter = filter; - this.outputFormatConfig = outputFormatConfig; - this.schema = schema; - - this.config = new JdbcToArrowConfigBuilder() - .setAllocator(allocator) - .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - // 设置浮点型精度规则,四舍五入,保证精度有损场景 - .setBigDecimalRoundingMode(RoundingMode.CEILING) - .build(); - - // 若指定读取顺序则调整schema - if (CollectionUtils.isNotEmpty(fieldList)) { - this.schema = new Schema(fieldList.stream().map(schema::findField).collect(Collectors.toList())); - } else { - this.schema = schema; - } - } - - private static void preAllocate(VectorSchemaRoot root, int targetSize) { - for (ValueVector vector : root.getFieldVectors()) { - if (vector instanceof BaseFixedWidthVector) { - ((BaseFixedWidthVector) vector).allocateNew(targetSize); - } - } - } - - @Override - public ArrowReader startRead() { - - String fieldPart = this.jdbcAssistant.createFieldPart(this.fieldList, this.composeTableName); - - StringBuilder selectSqlBuilder = new StringBuilder().append(fieldPart); - - // sql模板替换参数列表 - if (this.partitionBehavior != null && this.partitionBehavior.isValid()) { - log.info("[startRead] partitionBehavior: {}", JsonUtils.toJSONString(this.partitionBehavior)); - String partitionFieldName = this.jdbcAssistant.decorateIdentifier(this.partitionBehavior.getFieldName()); - selectSqlBuilder.append("where ") - .append(partitionFieldName) - .append(">= %s and ") - .append(partitionFieldName) - .append("< %s"); - } - this.sqlPartTemplate = selectSqlBuilder.toString(); - return this; - } - - @Override - public boolean loadNextBatch() throws IOException { - VectorSchemaRoot root = getVectorSchemaRoot(); - root.clear(); - - String sql = generateNextSql(); - if (StringUtils.isEmpty(sql)) { - return false; - } - - try (Connection conn = getDatabaseConn(); - ResultSet resultSet = conn.createStatement().executeQuery(sql)) { - JdbcToArrowUtils.jdbcToArrowVectors(resultSet, getVectorSchemaRoot(), this.config); - } catch (SQLException e) { - throw DataproxyException.of(DataproxyErrorCode.JDBC_FETCH_BATCH_DATA_FAILED, e); - } - return true; - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() throws IOException { - close(); - } - - @Override - protected Schema readSchema() { - return this.schema; - } - - @Override - public void close() { - try { - if (this.dataSource != null) { - this.dataSource.close(); - } - } catch (Exception ignored) { - } - } - - /** - * 获取数据库连接 - * - * @return - */ - protected Connection getDatabaseConn() { - try { - return dataSource.getConnection(); - } catch (Exception e) { - throw DataproxyException.of(DataproxyErrorCode.JDBC_GET_CONN_THREAD_FAILED, e); - } - } - - // 生成下一条查询sql - private String generateNextSql() { - String currentLowerBoundStr = null; - String currentUpperBoundStr = null; - - // 分页规则 - if (this.partitionBehavior != null && this.partitionBehavior.isValid()) { - String current = StringUtils.isEmpty(this.currentPartition) ? this.partitionBehavior.getLowerBound() : this.currentPartition; - - switch (this.partitionBehavior.getType()) { - case Int: { - Long currentLowerBound = Long.valueOf(current); - if (currentLowerBound > Long.valueOf(this.partitionBehavior.getUpperBound())) { - return null; - } - - Long currentUpperBound = currentLowerBound + (long) Math.ceil(Double.parseDouble(this.partitionBehavior.getStep())); - - currentLowerBoundStr = String.valueOf(currentLowerBound); - currentUpperBoundStr = String.valueOf(currentUpperBound); - break; - } - case FloatingPoint: - case Decimal: { - Double currentLowerBound = Double.valueOf(current); - if (currentLowerBound > Double.valueOf(this.partitionBehavior.getUpperBound())) { - return null; - } - - Double currentUpperBound = currentLowerBound + Double.parseDouble(this.partitionBehavior.getStep()); - - currentLowerBoundStr = String.valueOf(currentLowerBound); - currentUpperBoundStr = String.valueOf(currentUpperBound); - break; - } - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "分页字段 " + this.partitionBehavior.getFieldName() + " 无法做数值计算"); - } - } else { - if (this.currentSize > 0) { - return null; - } - } - - Map valuesMap = new HashMap<>(); - valuesMap.put("sqlPart", String.format(this.sqlPartTemplate, currentLowerBoundStr, currentUpperBoundStr)); - valuesMap.put("limitPart", ""); - String execSql = new StringSubstitutor(valuesMap).replace(this.jdbcAssistant.selectSQLTemplate()); - - this.currentPartition = currentUpperBoundStr; - this.currentSize++; - return execSql; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/MysqlJdbcAssistant.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/MysqlJdbcAssistant.java deleted file mode 100644 index bb0b8d6..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/MysqlJdbcAssistant.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.dataset.format.IndexType; -import org.secretflow.dataproxy.common.model.dataset.format.TableFormatConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.MysqlConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.MysqlLocationConfig; - -import com.zaxxer.hikari.HikariConfig; -import org.apache.arrow.vector.types.pojo.Schema; - -import java.sql.JDBCType; -import java.util.ArrayList; -import java.util.List; - -/** - * @author muhong - * @date 2023-09-07 19:56 - */ -public class MysqlJdbcAssistant implements JdbcAssistant { - private static final String MYSQL_JDBC_URL_PREFIX = "jdbc:mysql://"; - private static final String MYSQL_CONNECTION_TEST_QUERY = "SELECT 1 FROM DUAL"; - private static final String MYSQL_DRIVER_CLASS_NAME = "com.mysql.cj.jdbc.Driver"; - - @Override - public String getConnectionTestQuery() { - return MYSQL_CONNECTION_TEST_QUERY; - } - - @Override - public String getDriverClass() { - return MYSQL_DRIVER_CLASS_NAME; - } - - @Override - public void initDataSourceConfig(HikariConfig config, MysqlConnConfig connConfig) { - config.setJdbcUrl(MYSQL_JDBC_URL_PREFIX + connConfig.getHost()); - config.setCatalog(connConfig.getDatabase()); - } - - @Override - public String decorateIdentifier(String identifier) { - return "`" + identifier + "`"; - } - - @Override - public String composeTableName(MysqlLocationConfig locationConfig) { - return decorateIdentifier(locationConfig.getTable()); - } - - @Override - public String jdbcTypeToDbTypeString(JDBCType jdbcType) { - return switch (jdbcType) { - case TINYINT -> "TINYINT"; - case SMALLINT -> "SMALLINT"; - case INTEGER -> "INT"; - case BIGINT -> "BIGINT"; - case REAL -> "REAL"; - case FLOAT -> "FLOAT"; - case DOUBLE -> "DOUBLE"; - case DECIMAL -> "DECIMAL"; - case BOOLEAN -> "BOOLEAN"; - case DATE -> "DATE"; - case TIME -> "TIME"; - case TIMESTAMP, TIMESTAMP_WITH_TIMEZONE -> "TIMESTAMP DEFAULT '2000-01-01 00:00:00'"; - case VARCHAR -> "TEXT"; - case LONGVARCHAR -> "LONGTEXT"; - case BINARY, VARBINARY -> "BLOB"; - case LONGVARBINARY -> "LONGBLOB"; - default -> throw DataproxyException.of(DataproxyErrorCode.UNSUPPORTED_FIELD_TYPE, jdbcType.name()); - }; - } - - @Override - public String indexKeyword(IndexType indexType) { - switch (indexType) { - case UNIQUE: - return "UNIQUE KEY"; - case INDEX: - return "INDEX"; - default: - throw DataproxyException.of(DataproxyErrorCode.UNSUPPORTED_INDEX_TYPE, indexType.name()); - } - } - - @Override - public List preWorkSqls(String composeTableName, Schema schema, MysqlLocationConfig locationConfig, TableFormatConfig formatConfig) { - List preWorkSqls = new ArrayList<>(); - preWorkSqls.add(dropTableSql(composeTableName)); - String createTabelSql = "CREATE TABLE " + composeTableName + " (" - + createTableColumnTypes(schema.getFields(), formatConfig) - + "," + decorateIdentifier(formatConfig.getPrimaryKey()) - + " BIGINT PRIMARY KEY NOT NULL AUTO_INCREMENT" - + createIndex(formatConfig.getIndexList(), schema.getFields()) - + ") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4"; - preWorkSqls.add(createTabelSql); - return preWorkSqls; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/ArrowVectorIterator.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/ArrowVectorIterator.java deleted file mode 100644 index 404a89a..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/ArrowVectorIterator.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer.CompositeJdbcConsumer; -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer.JdbcConsumer; - -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ValueVectorUtility; - -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Iterator; - - -/** - * VectorSchemaRoot iterator for partially converting JDBC data. - */ -public class ArrowVectorIterator implements Iterator, AutoCloseable { - - final CompositeJdbcConsumer compositeConsumer; - private final ResultSet resultSet; - private final JdbcToArrowConfig config; - private final Schema schema; - private final ResultSetMetaData rsmd; - private final JdbcConsumer[] consumers; - private final int targetBatchSize; - // this is used only if resuing vector schema root is enabled. - private VectorSchemaRoot nextBatch; - // This is used to track whether the ResultSet has been fully read, and is needed spcifically for cases where there - // is a ResultSet having zero rows (empty): - private boolean readComplete = false; - - /** - * Construct an instance. - */ - private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { - this.resultSet = resultSet; - this.config = config; - this.schema = JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config); - this.targetBatchSize = config.getTargetBatchSize(); - - rsmd = resultSet.getMetaData(); - consumers = new JdbcConsumer[rsmd.getColumnCount()]; - this.compositeConsumer = new CompositeJdbcConsumer(consumers); - this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null; - } - - /** - * Create a ArrowVectorIterator to partially convert data. - */ - public static ArrowVectorIterator create( - ResultSet resultSet, - JdbcToArrowConfig config) - throws SQLException { - ArrowVectorIterator iterator = null; - try { - iterator = new ArrowVectorIterator(resultSet, config); - } catch (Throwable e) { - AutoCloseables.close(e, iterator); - throw new RuntimeException("Error occurred while creating iterator.", e); - } - return iterator; - } - - private void consumeData(VectorSchemaRoot root) { - // consume data - try { - int readRowCount = 0; - if (targetBatchSize == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - while (resultSet.next()) { - ValueVectorUtility.ensureCapacity(root, readRowCount + 1); - compositeConsumer.consume(resultSet); - readRowCount++; - } - readComplete = true; - } else { - while ((readRowCount < targetBatchSize) && !readComplete) { - if (resultSet.next()) { - compositeConsumer.consume(resultSet); - readRowCount++; - } else { - readComplete = true; - } - } - } - - root.setRowCount(readRowCount); - } catch (Throwable e) { - compositeConsumer.close(); - throw new RuntimeException("Error occurred while consuming data.", e); - } - } - - private VectorSchemaRoot createVectorSchemaRoot() throws SQLException { - VectorSchemaRoot root = null; - try { - root = VectorSchemaRoot.create(schema, config.getAllocator()); - if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); - } - } catch (Throwable e) { - if (root != null) { - root.close(); - } - throw new RuntimeException("Error occurred while creating schema root.", e); - } - initialize(root); - return root; - } - - private void initialize(VectorSchemaRoot root) throws SQLException { - for (int i = 1; i <= consumers.length; i++) { - final JdbcFieldInfo columnFieldInfo = JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); - ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); - consumers[i - 1] = JdbcToArrowUtils.getConsumer( - arrowType, i, JdbcToArrowUtils.isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), root.getVector(i - 1), config); - } - } - - // Loads the next schema root or null if no more rows are available. - private void load(VectorSchemaRoot root) { - for (int i = 0; i < consumers.length; i++) { - FieldVector vec = root.getVector(i); - if (config.isReuseVectorSchemaRoot()) { - // if we are reusing the vector schema root, - // we must reset the vector before populating it with data. - vec.reset(); - } - consumers[i].resetValueVector(vec); - } - - consumeData(root); - } - - @Override - public boolean hasNext() { - return !readComplete; - } - - /** - * Gets the next vector. - * If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, - * the client is responsible for freeing its resources. - */ - @Override - public VectorSchemaRoot next() { - Preconditions.checkArgument(hasNext()); - try { - VectorSchemaRoot ret = config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); - load(ret); - return ret; - } catch (Exception e) { - close(); - throw new RuntimeException("Error occurred while getting next schema root.", e); - } - } - - /** - * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a new VectorSchemaRoot - * is created for each batch, each root must be closed manually by the client code. - */ - @Override - public void close() { - if (config.isReuseVectorSchemaRoot()) { - nextBatch.close(); - compositeConsumer.close(); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/Constants.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/Constants.java deleted file mode 100644 index 6c58064..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/Constants.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -/** - * String constants used for metadata returned on Vectors. - */ -public class Constants { - public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; - public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME"; - public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; - public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; - public static final String SQL_TYPE_KEY = "SQL_TYPE"; - private Constants() { - } - -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcFieldInfo.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcFieldInfo.java deleted file mode 100644 index 1ade6dd..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcFieldInfo.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -import org.apache.arrow.util.Preconditions; - -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Types; - -/** - * This class represents the information about a JDBC ResultSet Field that is - * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}. - * Currently, this is: - *

    - *
  • The JDBC {@link Types} type.
  • - *
  • The nullability.
  • - *
  • The field's precision (used for {@link Types#DECIMAL} and {@link Types#NUMERIC} types).
  • - *
  • The field's scale (used for {@link Types#DECIMAL} and {@link Types#NUMERIC} types).
  • - *
- */ -public class JdbcFieldInfo { - private final int column; - private final int jdbcType; - private final int nullability; - private final int precision; - private final int scale; - - /** - * Builds a JdbcFieldInfo using only the {@link Types} type. Do not use this constructor - * if the field type is {@link Types#DECIMAL} or {@link Types#NUMERIC}; the precision and - * scale will be set to 0. - * - * @param jdbcType The {@link Types} type. - * @throws IllegalArgumentException if jdbcType is {@link Types#DECIMAL} or {@link Types#NUMERIC}. - */ - public JdbcFieldInfo(int jdbcType) { - Preconditions.checkArgument( - (jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC), - "DECIMAL and NUMERIC types require a precision and scale; please use another constructor."); - - this.column = 0; - this.jdbcType = jdbcType; - this.nullability = ResultSetMetaData.columnNullableUnknown; - this.precision = 0; - this.scale = 0; - } - - /** - * Builds a JdbcFieldInfo from the {@link Types} type, precision, and scale. - * Use this constructor for {@link Types#DECIMAL} and {@link Types#NUMERIC} types. - * - * @param jdbcType The {@link Types} type. - * @param precision The field's numeric precision. - * @param scale The field's numeric scale. - */ - public JdbcFieldInfo(int jdbcType, int precision, int scale) { - this.column = 0; - this.jdbcType = jdbcType; - this.nullability = ResultSetMetaData.columnNullableUnknown; - this.precision = precision; - this.scale = scale; - } - - /** - * Builds a JdbcFieldInfo from the {@link Types} type, nullability, precision, and scale. - * - * @param jdbcType The {@link Types} type. - * @param nullability The nullability. Must be one of {@link ResultSetMetaData#columnNoNulls}, - * {@link ResultSetMetaData#columnNullable}, or {@link ResultSetMetaData#columnNullableUnknown}. - * @param precision The field's numeric precision. - * @param scale The field's numeric scale. - */ - public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { - this.column = 0; - this.jdbcType = jdbcType; - this.nullability = nullability; - this.precision = precision; - this.scale = scale; - } - - /** - * Builds a JdbcFieldInfo from the corresponding {@link ResultSetMetaData} column. - * - * @param rsmd The {@link ResultSetMetaData} to get the field information from. - * @param column The column to get the field information for (on a 1-based index). - * @throws SQLException If the column information cannot be retrieved. - * @throws NullPointerException if rsmd is null. - * @throws IllegalArgumentException if column is out of bounds. - */ - public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { - Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); - Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1."); - Preconditions.checkArgument( - column <= rsmd.getColumnCount(), - "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount()); - - this.column = column; - this.jdbcType = rsmd.getColumnType(column); - this.nullability = rsmd.isNullable(column); - this.precision = rsmd.getPrecision(column); - this.scale = rsmd.getScale(column); - } - - /** - * The {@link Types} type. - */ - public int getJdbcType() { - return jdbcType; - } - - /** - * The nullability. - */ - public int isNullable() { - return nullability; - } - - /** - * The numeric precision, for {@link Types#NUMERIC} and {@link Types#DECIMAL} types. - */ - public int getPrecision() { - return precision; - } - - /** - * The numeric scale, for {@link Types#NUMERIC} and {@link Types#DECIMAL} types. - */ - public int getScale() { - return scale; - } - - /** - * The column index for query column. - */ - public int getColumn() { - return column; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcParameterBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcParameterBinder.java deleted file mode 100644 index b1430f3..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcParameterBinder.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder.ColumnBinder; - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.util.HashMap; -import java.util.Map; - -/** - * A binder binds JDBC prepared statement parameters to rows of Arrow data from a VectorSchemaRoot. - *

- * Each row of the VectorSchemaRoot will be bound to the configured parameters of the PreparedStatement. - * One row of data is bound at a time. - */ -public class JdbcParameterBinder { - private final PreparedStatement statement; - private final VectorSchemaRoot root; - private final ColumnBinder[] binders; - private final int[] parameterIndices; - private int nextRowIndex; - - /** - * Create a new parameter binder. - * - * @param statement The statement to bind parameters to. - * @param root The VectorSchemaRoot to pull data from. - * @param binders Column binders to translate from Arrow data to JDBC parameters, one per parameter. - * @param parameterIndices For each binder in binders, the index of the parameter to bind to. - */ - private JdbcParameterBinder( - final PreparedStatement statement, - final VectorSchemaRoot root, - final ColumnBinder[] binders, - int[] parameterIndices) { - Preconditions.checkArgument( - binders.length == parameterIndices.length, - "Number of column binders (%s) must equal number of parameter indices (%s)", - binders.length, parameterIndices.length); - this.statement = statement; - this.root = root; - this.binders = binders; - this.parameterIndices = parameterIndices; - this.nextRowIndex = 0; - } - - /** - * Initialize a binder with a builder. - * - * @param statement The statement to bind to. The binder does not maintain ownership of the statement. - * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain ownership - * of the vector schema root. - */ - public static Builder builder(final PreparedStatement statement, final VectorSchemaRoot root) { - return new Builder(statement, root); - } - - /** - * Reset the binder (so the root can be updated with new data). - */ - public void reset() { - nextRowIndex = 0; - } - - /** - * Bind the next row of data to the parameters of the statement. - *

- * After this, the application should call the desired method on the prepared statement, - * such as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. - * - * @return true if a row was bound, false if rows were exhausted - */ - public boolean next() throws SQLException { - if (nextRowIndex >= root.getRowCount()) { - return false; - } - for (int i = 0; i < parameterIndices.length; i++) { - final int parameterIndex = parameterIndices[i]; - binders[i].bind(statement, parameterIndex, nextRowIndex); - } - nextRowIndex++; - return true; - } - - /** - * A builder for a {@link JdbcParameterBinder}. - */ - public static class Builder { - private final PreparedStatement statement; - private final VectorSchemaRoot root; - private final Map bindings; - - Builder(PreparedStatement statement, VectorSchemaRoot root) { - this.statement = statement; - this.root = root; - this.bindings = new HashMap<>(); - } - - /** - * Bind each column to the corresponding parameter in order. - */ - public Builder bindAll() { - for (int i = 0; i < root.getFieldVectors().size(); i++) { - bind(/*parameterIndex=*/ i + 1, /*columnIndex=*/ i); - } - return this; - } - - /** - * Bind the given parameter to the given column using the default binder. - */ - public Builder bind(int parameterIndex, int columnIndex) { - return bind( - parameterIndex, - ColumnBinder.forVector(root.getVector(columnIndex))); - } - - /** - * Bind the given parameter using the given binder. - */ - public Builder bind(int parameterIndex, ColumnBinder binder) { - Preconditions.checkArgument( - parameterIndex > 0, "parameterIndex %d must be positive", parameterIndex); - bindings.put(parameterIndex, binder); - return this; - } - - /** - * Build the binder. - */ - public JdbcParameterBinder build() { - ColumnBinder[] binders = new ColumnBinder[bindings.size()]; - int[] parameterIndices = new int[bindings.size()]; - int index = 0; - for (Map.Entry entry : bindings.entrySet()) { - binders[index] = entry.getValue(); - parameterIndices[index] = entry.getKey(); - index++; - } - return new JdbcParameterBinder(statement, root, binders, parameterIndices); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrow.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrow.java deleted file mode 100644 index 1ff07fb..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrow.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; - -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Utility class to convert JDBC objects to columnar Arrow format objects. - * - *

This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types. - * - *

CHAR --> ArrowType.Utf8 - * NCHAR --> ArrowType.Utf8 - * VARCHAR --> ArrowType.Utf8 - * NVARCHAR --> ArrowType.Utf8 - * LONGVARCHAR --> ArrowType.Utf8 - * LONGNVARCHAR --> ArrowType.Utf8 - * NUMERIC --> ArrowType.Decimal(precision, scale) - * DECIMAL --> ArrowType.Decimal(precision, scale) - * BIT --> ArrowType.Bool - * TINYINT --> ArrowType.Int(8, signed) - * SMALLINT --> ArrowType.Int(16, signed) - * INTEGER --> ArrowType.Int(32, signed) - * BIGINT --> ArrowType.Int(64, signed) - * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) - * BINARY --> ArrowType.Binary - * VARBINARY --> ArrowType.Binary - * LONGVARBINARY --> ArrowType.Binary - * DATE --> ArrowType.Date(DateUnit.MILLISECOND) - * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) - * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) - * CLOB --> ArrowType.Utf8 - * BLOB --> ArrowType.Binary - * - * @since 0.10.0 - */ -public class JdbcToArrow { - - /*----------------------------------------------------------------* - | | - | Partial Convert API | - | | - *----------------------------------------------------------------*/ - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note here uses the default targetBatchSize = 1024. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param allocator Memory allocator - * @return Arrow Data Objects {@link ArrowVectorIterator} - * @throws SQLException on error - */ - public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - BufferAllocator allocator) - throws SQLException, IOException { - Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); - - JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); - return sqlToArrowVectorIterator(resultSet, config); - } - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value 1024. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. - * @return Arrow Data Objects {@link ArrowVectorIterator} - * @throws SQLException on error - */ - public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - JdbcToArrowConfig config) - throws SQLException, IOException { - Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); - Preconditions.checkNotNull(config, "The configuration cannot be null"); - return ArrowVectorIterator.create(resultSet, config); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfig.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfig.java deleted file mode 100644 index baa97f0..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfig.java +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -import lombok.Getter; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.types.pojo.ArrowType; - -import java.math.RoundingMode; -import java.util.Calendar; -import java.util.Map; -import java.util.function.Function; - -/** - * This class configures the JDBC-to-Arrow conversion process. - *

- * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, - * and the calendar is used to define the time zone of any - * {@link ArrowType.Timestamp} - * fields that are created during the conversion. Neither field may be null. - *

- *

- * If the includeMetadata flag is set, the Arrow field metadata will contain information - * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the - * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding - * {@link org.apache.arrow.vector.FieldVector}. - *

- *

- * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding - * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type - * information cannot be retrieved from all JDBC implementations (H2 for example, returns - * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index - * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion. - *

- */ -public final class JdbcToArrowConfig { - - public static final int DEFAULT_TARGET_BATCH_SIZE = 1024; - public static final int NO_LIMIT_BATCH_SIZE = -1; - private final Calendar calendar; - private final BufferAllocator allocator; - private final boolean includeMetadata; - private final boolean reuseVectorSchemaRoot; - private final Map arraySubTypesByColumnIndex; - private final Map arraySubTypesByColumnName; - private final Map explicitTypesByColumnIndex; - private final Map explicitTypesByColumnName; - /** - * -- GETTER -- - * Return schema level metadata or null if not provided. - */ - @Getter - private final Map schemaMetadata; - /** - * -- GETTER -- - * Return metadata from columnIndex->meta map on per field basis - * or null if not provided. - */ - @Getter - private final Map> columnMetadataByColumnIndex; - @Getter - private final RoundingMode bigDecimalRoundingMode; - /** - * The maximum rowCount to read each time when partially convert data. - * Default value is 1024 and -1 means disable partial read. - * default is -1 which means disable partial read. - * Note that this flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} - * 1) if targetBatchSize != -1, it will convert full data into multiple vectors - * with valueCount no more than targetBatchSize. - * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link ArrowVectorIterator} - *

- */ - private final int targetBatchSize; - - private final Function jdbcToArrowTypeConverter; - - /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. - * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. - */ - JdbcToArrowConfig(BufferAllocator allocator, Calendar calendar) { - this(allocator, calendar, - /* include metadata */ false, - /* reuse vector schema root */ false, - /* array sub-types by column index */ null, - /* array sub-types by column name */ null, - DEFAULT_TARGET_BATCH_SIZE, null, null); - } - - JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter) { - this(allocator, calendar, includeMetadata, reuseVectorSchemaRoot, arraySubTypesByColumnIndex, - arraySubTypesByColumnName, targetBatchSize, jdbcToArrowTypeConverter, null); - } - - /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. - * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. - * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. - * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load. - * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). - * @param arraySubTypesByColumnName The type of the JDBC array at the column name. - * @param targetBatchSize The target batch size to be used in preallcation of the resulting vectors. - * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow type. If set to null, - * the default mapping will be used, which is defined as: - *
    - *
  • CHAR --> ArrowType.Utf8
  • - *
  • NCHAR --> ArrowType.Utf8
  • - *
  • VARCHAR --> ArrowType.Utf8
  • - *
  • NVARCHAR --> ArrowType.Utf8
  • - *
  • LONGVARCHAR --> ArrowType.Utf8
  • - *
  • LONGNVARCHAR --> ArrowType.Utf8
  • - *
  • NUMERIC --> ArrowType.Decimal(precision, scale)
  • - *
  • DECIMAL --> ArrowType.Decimal(precision, scale)
  • - *
  • BIT --> ArrowType.Bool
  • - *
  • TINYINT --> ArrowType.Int(8, signed)
  • - *
  • SMALLINT --> ArrowType.Int(16, signed)
  • - *
  • INTEGER --> ArrowType.Int(32, signed)
  • - *
  • BIGINT --> ArrowType.Int(64, signed)
  • - *
  • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • - *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • - *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
  • - *
  • BINARY --> ArrowType.Binary
  • - *
  • VARBINARY --> ArrowType.Binary
  • - *
  • LONGVARBINARY --> ArrowType.Binary
  • - *
  • DATE --> ArrowType.Date(DateUnit.DAY)
  • - *
  • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
  • - *
  • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)
  • - *
  • CLOB --> ArrowType.Utf8
  • - *
  • BLOB --> ArrowType.Binary
  • - *
  • ARRAY --> ArrowType.List
  • - *
  • STRUCT --> ArrowType.Struct
  • - *
  • NULL --> ArrowType.Null
  • - *
- * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal from a - * ResultSet having a scale which does not match that of the target vector. Use null - * (default value) to require strict scale matching. - */ - JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter, - RoundingMode bigDecimalRoundingMode) { - - this( - allocator, - calendar, - includeMetadata, - reuseVectorSchemaRoot, - arraySubTypesByColumnIndex, - arraySubTypesByColumnName, - targetBatchSize, - jdbcToArrowTypeConverter, - null, - null, - null, - null, - bigDecimalRoundingMode); - } - - JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter, - Map explicitTypesByColumnIndex, - Map explicitTypesByColumnName, - Map schemaMetadata, - Map> columnMetadataByColumnIndex, - RoundingMode bigDecimalRoundingMode) { - Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); - this.allocator = allocator; - this.calendar = calendar; - this.includeMetadata = includeMetadata; - this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; - this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex; - this.arraySubTypesByColumnName = arraySubTypesByColumnName; - this.targetBatchSize = targetBatchSize; - this.explicitTypesByColumnIndex = explicitTypesByColumnIndex; - this.explicitTypesByColumnName = explicitTypesByColumnName; - this.schemaMetadata = schemaMetadata; - this.columnMetadataByColumnIndex = columnMetadataByColumnIndex; - this.bigDecimalRoundingMode = bigDecimalRoundingMode; - - // set up type converter - this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter : - (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); - } - - /** - * The calendar to use when defining Arrow Timestamp fields - * and retrieving {@link java.sql.Date}, {@link java.sql.Time}, or {@link java.sql.Timestamp} - * data types from the {@link java.sql.ResultSet}, or null if not converting. - * - * @return the calendar. - */ - public Calendar getCalendar() { - return calendar; - } - - /** - * The Arrow memory allocator. - * - * @return the allocator. - */ - public BufferAllocator getAllocator() { - return allocator; - } - - /** - * Whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. - * - * @return true to include field metadata, false to exclude it. - */ - public boolean shouldIncludeMetadata() { - return includeMetadata; - } - - /** - * Get the target batch size for partial read. - */ - public int getTargetBatchSize() { - return targetBatchSize; - } - - /** - * Get whether it is allowed to reuse the vector schema root. - */ - public boolean isReuseVectorSchemaRoot() { - return reuseVectorSchemaRoot; - } - - /** - * Gets the mapping between JDBC type information to Arrow type. - */ - public Function getJdbcToArrowTypeConverter() { - return jdbcToArrowTypeConverter; - } - - /** - * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. - * - * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. - */ - public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { - if (arraySubTypesByColumnIndex == null) { - return null; - } else { - return arraySubTypesByColumnIndex.get(index); - } - } - - /** - * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. - * - * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. - */ - public JdbcFieldInfo getArraySubTypeByColumnName(String name) { - if (arraySubTypesByColumnName == null) { - return null; - } else { - return arraySubTypesByColumnName.get(name); - } - } - - /** - * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column index. - * - * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type mapping. - * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. - */ - public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { - if (explicitTypesByColumnIndex == null) { - return null; - } else { - return explicitTypesByColumnIndex.get(index); - } - } - - /** - * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column name. - * - * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type mapping. - * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. - */ - public JdbcFieldInfo getExplicitTypeByColumnName(String name) { - if (explicitTypesByColumnName == null) { - return null; - } else { - return explicitTypesByColumnName.get(name); - } - } - -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfigBuilder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfigBuilder.java deleted file mode 100644 index 2874d33..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowConfigBuilder.java +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.types.pojo.ArrowType; - -import java.math.RoundingMode; -import java.util.Calendar; -import java.util.Map; -import java.util.function.Function; - - -/** - * This class builds {@link JdbcToArrowConfig}s. - */ -public class JdbcToArrowConfigBuilder { - private Calendar calendar; - private BufferAllocator allocator; - private boolean includeMetadata; - private boolean reuseVectorSchemaRoot; - private Map arraySubTypesByColumnIndex; - private Map arraySubTypesByColumnName; - private Map explicitTypesByColumnIndex; - private Map explicitTypesByColumnName; - private Map schemaMetadata; - private Map> columnMetadataByColumnIndex; - private int targetBatchSize; - private Function jdbcToArrowTypeConverter; - private RoundingMode bigDecimalRoundingMode; - - /** - * Default constructor for the JdbcToArrowConfigBuilder}. - * Use the setter methods for the allocator and calendar; the allocator must be - * set. Otherwise, {@link #build()} will throw a {@link NullPointerException}. - */ - public JdbcToArrowConfigBuilder() { - this.allocator = null; - this.calendar = null; - this.includeMetadata = false; - this.reuseVectorSchemaRoot = false; - this.arraySubTypesByColumnIndex = null; - this.arraySubTypesByColumnName = null; - this.explicitTypesByColumnIndex = null; - this.explicitTypesByColumnName = null; - this.schemaMetadata = null; - this.columnMetadataByColumnIndex = null; - this.bigDecimalRoundingMode = null; - } - - /** - * Constructor for the JdbcToArrowConfigBuilder. The - * allocator is required, and a {@link NullPointerException} - * will be thrown if it is null. - *

- * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

- * - * @param allocator The Arrow Vector memory allocator. - * @param calendar The calendar to use when constructing timestamp fields. - */ - public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) { - this(); - - Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); - - this.allocator = allocator; - this.calendar = calendar; - this.includeMetadata = false; - this.reuseVectorSchemaRoot = false; - this.targetBatchSize = JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE; - } - - /** - * Constructor for the JdbcToArrowConfigBuilder. Both the - * allocator and calendar are required. A {@link NullPointerException} - * will be thrown if either of those arguments is null. - *

- * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

- *

- * The includeMetadata argument, if true will cause - * various information about each database field to be added to the Vector - * Schema's field metadata. - *

- * - * @param allocator The Arrow Vector memory allocator. - * @param calendar The calendar to use when constructing timestamp fields. - */ - public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { - this(allocator, calendar); - this.includeMetadata = includeMetadata; - } - - /** - * Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet. - * - * @param allocator the allocator to set. - * @throws NullPointerException if allocator is null. - */ - public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) { - Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); - this.allocator = allocator; - return this; - } - - /** - * Sets the {@link Calendar} to use when constructing timestamp fields in the - * Arrow schema, and reading time-based fields from the JDBC ResultSet. - * - * @param calendar the calendar to set. - */ - public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { - this.calendar = calendar; - return this; - } - - /** - * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. - * - * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata. - * @return This instance of the JdbcToArrowConfig, for chaining. - */ - public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { - this.includeMetadata = includeMetadata; - return this; - } - - /** - * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. - * The column index is 1-based, to match the JDBC column index. - * - * @param map The mapping. - * @return This instance of the JdbcToArrowConfig, for chaining. - */ - public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map map) { - this.arraySubTypesByColumnIndex = map; - return this; - } - - /** - * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. - * - * @param map The mapping. - * @return This instance of the JdbcToArrowConfig, for chaining. - */ - public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map map) { - this.arraySubTypesByColumnName = map; - return this; - } - - /** - * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for column types. - *

- * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. - *

- * The column index is 1-based, to match the JDBC column index. - * - * @param map The mapping. - */ - public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map map) { - this.explicitTypesByColumnIndex = map; - return this; - } - - /** - * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for column types. - *

- * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. - * - * @param map The mapping. - */ - public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map map) { - this.explicitTypesByColumnName = map; - return this; - } - - /** - * Set the target number of rows to convert at once. - *

- * Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. - */ - public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { - this.targetBatchSize = targetBatchSize; - return this; - } - - /** - * Set the function used to convert JDBC types to Arrow types. - *

- * Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, Calendar)}. - */ - public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( - Function jdbcToArrowTypeConverter) { - this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter; - return this; - } - - /** - * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each iteration, - * or to allocate a new one. - */ - public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) { - this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; - return this; - } - - /** - * Set metadata for schema. - */ - public JdbcToArrowConfigBuilder setSchemaMetadata(Map schemaMetadata) { - this.schemaMetadata = schemaMetadata; - return this; - } - - /** - * Set metadata from columnIndex->meta map on per field basis. - */ - public JdbcToArrowConfigBuilder setColumnMetadataByColumnIndex( - Map> columnMetadataByColumnIndex) { - this.columnMetadataByColumnIndex = columnMetadataByColumnIndex; - return this; - } - - /** - * Set the rounding mode used when the scale of the actual value does not match the declared scale. - *

- * By default, an error is raised in such cases. - */ - public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecimalRoundingMode) { - this.bigDecimalRoundingMode = bigDecimalRoundingMode; - return this; - } - - /** - * This builds the {@link JdbcToArrowConfig} from the provided - * {@link BufferAllocator} and {@link Calendar}. - * - * @return The built {@link JdbcToArrowConfig} - * @throws NullPointerException if either the allocator or calendar was not set. - */ - public JdbcToArrowConfig build() { - return new JdbcToArrowConfig( - allocator, - calendar, - includeMetadata, - reuseVectorSchemaRoot, - arraySubTypesByColumnIndex, - arraySubTypesByColumnName, - targetBatchSize, - jdbcToArrowTypeConverter, - explicitTypesByColumnIndex, - explicitTypesByColumnName, - schemaMetadata, - columnMetadataByColumnIndex, - bigDecimalRoundingMode); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowUtils.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowUtils.java deleted file mode 100644 index 49d6609..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/JdbcToArrowUtils.java +++ /dev/null @@ -1,448 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor; - -import org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer.*; - -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.*; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.types.DateUnit; -import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; -import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ValueVectorUtility; - -import java.io.IOException; -import java.math.RoundingMode; -import java.sql.Date; -import java.sql.*; -import java.util.*; - -/** - * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector objects. - * - * @since 0.10.0 - */ -public class JdbcToArrowUtils { - - private static final int JDBC_ARRAY_VALUE_COLUMN = 2; - - /** - * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. - */ - public static Calendar getUtcCalendar() { - return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); - } - - /** - * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. - * - * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. - * @return {@link Schema} - * @throws SQLException on error - */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { - Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - - return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); - } - - /** - * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. - * - * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. - * @return {@link Schema} - * @throws SQLException on error - */ - public static Schema jdbcToArrowSchema(final ParameterMetaData parameterMetaData, final Calendar calendar) - throws SQLException { - Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - Preconditions.checkNotNull(parameterMetaData); - final List parameterFields = new ArrayList<>(parameterMetaData.getParameterCount()); - for (int parameterCounter = 1; parameterCounter <= parameterMetaData.getParameterCount(); - parameterCounter++) { - final int jdbcDataType = parameterMetaData.getParameterType(parameterCounter); - final int jdbcIsNullable = parameterMetaData.isNullable(parameterCounter); - final boolean arrowIsNullable = jdbcIsNullable != ParameterMetaData.parameterNoNulls; - final int precision = parameterMetaData.getPrecision(parameterCounter); - final int scale = parameterMetaData.getScale(parameterCounter); - final ArrowType arrowType = getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); - final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/null); - parameterFields.add(new Field(null, fieldType, null)); - } - - return new Schema(parameterFields); - } - - /** - * Converts the provided JDBC type to its respective {@link ArrowType} counterpart. - * - * @param fieldInfo the {@link JdbcFieldInfo} with information about the original JDBC type. - * @param calendar the {@link Calendar} to use for datetime data types. - * @return a new {@link ArrowType}. - */ - public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo, final Calendar calendar) { - switch (fieldInfo.getJdbcType()) { - case Types.BOOLEAN: - case Types.BIT: - return new ArrowType.Bool(); - case Types.TINYINT: - return new ArrowType.Int(8, true); - case Types.SMALLINT: - return new ArrowType.Int(16, true); - case Types.INTEGER: - return new ArrowType.Int(32, true); - case Types.BIGINT: - return new ArrowType.Int(64, true); - case Types.NUMERIC: - case Types.DECIMAL: - int precision = fieldInfo.getPrecision(); - int scale = fieldInfo.getScale(); - return new ArrowType.Decimal(precision, scale, 128); - case Types.REAL: - case Types.FLOAT: - return new ArrowType.FloatingPoint(SINGLE); - case Types.DOUBLE: - return new ArrowType.FloatingPoint(DOUBLE); - case Types.CHAR: - case Types.NCHAR: - case Types.VARCHAR: - case Types.NVARCHAR: - case Types.LONGVARCHAR: - case Types.LONGNVARCHAR: - case Types.CLOB: - return new ArrowType.Utf8(); - case Types.DATE: - return new ArrowType.Date(DateUnit.DAY); - case Types.TIME: - return new ArrowType.Time(TimeUnit.MILLISECOND, 32); - case Types.TIMESTAMP: - final String timezone; - if (calendar != null) { - timezone = calendar.getTimeZone().getID(); - } else { - timezone = null; - } - return new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone); - case Types.BINARY: - case Types.VARBINARY: - case Types.LONGVARBINARY: - case Types.BLOB: - return new ArrowType.Binary(); - case Types.ARRAY: - return new ArrowType.List(); - case Types.NULL: - return new ArrowType.Null(); - case Types.STRUCT: - return new ArrowType.Struct(); - default: - // no-op, shouldn't get here - return null; - } - } - - /** - * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. - * - *

- * If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the following fields - * will be added to the {@link FieldType#getMetadata()}: - *

    - *
  • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}
  • - *
  • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}
  • - *
  • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}
  • - *
  • {@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}
  • - *
- *

- *

- * If any columns are of type {@link Types#ARRAY}, the configuration object will be used to look up - * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be - * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. - *

- * - * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param config The configuration to use when constructing the schema. - * @return {@link Schema} - * @throws SQLException on error - * @throws IllegalArgumentException if rsmd contains an {@link Types#ARRAY} but the - * config does not have a sub-type definition for it. - */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { - Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); - Preconditions.checkNotNull(config, "The configuration object must not be null"); - - List fields = new ArrayList<>(); - int columnCount = rsmd.getColumnCount(); - for (int i = 1; i <= columnCount; i++) { - final String columnName = rsmd.getColumnLabel(i); - - final Map columnMetadata = config.getColumnMetadataByColumnIndex() != null ? - config.getColumnMetadataByColumnIndex().get(i) : null; - final Map metadata; - if (config.shouldIncludeMetadata()) { - metadata = new HashMap<>(); - metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i)); - metadata.put(Constants.SQL_SCHEMA_NAME_KEY, rsmd.getSchemaName(i)); - metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i)); - metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName); - metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i)); - if (columnMetadata != null && !columnMetadata.isEmpty()) { - metadata.putAll(columnMetadata); - } - } else { - if (columnMetadata != null && !columnMetadata.isEmpty()) { - metadata = columnMetadata; - } else { - metadata = null; - } - } - - final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); - final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); - if (arrowType != null) { - final FieldType fieldType = new FieldType( - isColumnNullable(rsmd, i, columnFieldInfo), arrowType, /* dictionary encoding */ null, metadata); - - List children = null; - if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) { - final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config); - if (arrayFieldInfo == null) { - throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i); - } - children = new ArrayList(); - final ArrowType childType = config.getJdbcToArrowTypeConverter().apply(arrayFieldInfo); - children.add(new Field("child", FieldType.nullable(childType), null)); - } else if (arrowType.getTypeID() == ArrowType.ArrowTypeID.Map) { - FieldType mapType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - FieldType keyType = new FieldType(false, new ArrowType.Utf8(), null, null); - FieldType valueType = new FieldType(false, new ArrowType.Utf8(), null, null); - children = new ArrayList<>(); - children.add(new Field("child", mapType, - Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, valueType, null)))); - } - - fields.add(new Field(columnName, fieldType, children)); - } - } - return new Schema(fields, config.getSchemaMetadata()); - } - - static JdbcFieldInfo getJdbcFieldInfoForColumn( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { - Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); - Preconditions.checkNotNull(config, "Configuration must not be null"); - Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); - Preconditions.checkArgument( - arrayColumn <= rsmd.getColumnCount(), - "Column number cannot be more than the number of columns"); - - JdbcFieldInfo fieldInfo = config.getExplicitTypeByColumnIndex(arrayColumn); - if (fieldInfo == null) { - fieldInfo = config.getExplicitTypeByColumnName(rsmd.getColumnLabel(arrayColumn)); - } - if (fieldInfo != null) { - return fieldInfo; - } - return new JdbcFieldInfo(rsmd, arrayColumn); - } - - /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is. - * If no sub-type can be found, returns null. - */ - private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { - - Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); - Preconditions.checkNotNull(config, "Configuration must not be null"); - Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); - Preconditions.checkArgument( - arrayColumn <= rsmd.getColumnCount(), - "Column number cannot be more than the number of columns"); - - JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn); - if (fieldInfo == null) { - fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnLabel(arrayColumn)); - } - return fieldInfo; - } - - /** - * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate - * the given Arrow Vector objects. - * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate - * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp} - * data types from the {@link ResultSet}, or null if not converting. - * @throws SQLException on error - */ - public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) - throws SQLException, IOException { - - Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - - jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); - } - - static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) - throws SQLException { - int nullableValue; - if (info != null && info.isNullable() != ResultSetMetaData.columnNullableUnknown) { - nullableValue = info.isNullable(); - } else { - nullableValue = resultSetMetadata.isNullable(index); - } - return nullableValue == ResultSetMetaData.columnNullable || - nullableValue == ResultSetMetaData.columnNullableUnknown; - } - - /** - * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate - * the given Arrow Vector objects. - * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate - * @param config The configuration to use when reading the data. - * @throws SQLException on error - */ - public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) - throws SQLException, IOException { - - ResultSetMetaData rsmd = rs.getMetaData(); - int columnCount = rsmd.getColumnCount(); - - JdbcConsumer[] consumers = new JdbcConsumer[columnCount]; - for (int i = 1; i <= columnCount; i++) { - FieldVector vector = root.getVector(rsmd.getColumnLabel(i)); - final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); - consumers[i - 1] = getConsumer( - vector.getField().getType(), i, isColumnNullable(rsmd, i, columnFieldInfo), vector, config); - } - - CompositeJdbcConsumer compositeConsumer = null; - // Only clean resources when occurs error, - // vectors within consumers are useful and users are responsible for its close. - try { - compositeConsumer = new CompositeJdbcConsumer(consumers); - int readRowCount = 0; - if (config.getTargetBatchSize() == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - while (rs.next()) { - ValueVectorUtility.ensureCapacity(root, readRowCount + 1); - compositeConsumer.consume(rs); - readRowCount++; - } - } else { - while (readRowCount < config.getTargetBatchSize() && rs.next()) { - compositeConsumer.consume(rs); - readRowCount++; - } - } - - root.setRowCount(readRowCount); - } catch (Exception e) { - // error occurs and clean up resources. - if (compositeConsumer != null) { - compositeConsumer.close(); - } - throw e; - } - } - - static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable, - FieldVector vector, JdbcToArrowConfig config) { - final Calendar calendar = config.getCalendar(); - - switch (arrowType.getTypeID()) { - case Bool: - return BitConsumer.createConsumer((BitVector) vector, columnIndex, nullable); - case Int: - switch (((ArrowType.Int) arrowType).getBitWidth()) { - case 8: - return TinyIntConsumer.createConsumer((TinyIntVector) vector, columnIndex, nullable); - case 16: - return SmallIntConsumer.createConsumer((SmallIntVector) vector, columnIndex, nullable); - case 32: - return IntConsumer.createConsumer((IntVector) vector, columnIndex, nullable); - case 64: - return BigIntConsumer.createConsumer((BigIntVector) vector, columnIndex, nullable); - default: - return null; - } - case Decimal: - final RoundingMode bigDecimalRoundingMode = config.getBigDecimalRoundingMode(); - return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); - case FloatingPoint: - switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) { - case SINGLE: - return FloatConsumer.createConsumer((Float4Vector) vector, columnIndex, nullable); - case DOUBLE: - return DoubleConsumer.createConsumer((Float8Vector) vector, columnIndex, nullable); - default: - return null; - } - case Utf8: - case LargeUtf8: - return VarCharConsumer.createConsumer((VarCharVector) vector, columnIndex, nullable); - case Binary: - case LargeBinary: - return BinaryConsumer.createConsumer((VarBinaryVector) vector, columnIndex, nullable); - case Date: - return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar); - case Time: - return TimeConsumer.createConsumer((TimeMilliVector) vector, columnIndex, nullable, calendar); - case Timestamp: - if (config.getCalendar() == null) { - return TimestampConsumer.createConsumer((TimeStampMilliVector) vector, columnIndex, nullable); - } else { - return TimestampTZConsumer.createConsumer((TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); - } - case List: - FieldVector childVector = ((ListVector) vector).getDataVector(); - JdbcConsumer delegate = getConsumer(childVector.getField().getType(), JDBC_ARRAY_VALUE_COLUMN, - childVector.getField().isNullable(), childVector, config); - return ArrayConsumer.createConsumer((ListVector) vector, delegate, columnIndex, nullable); - case Map: - return MapConsumer.createConsumer((MapVector) vector, columnIndex, nullable); - case Null: - return new NullConsumer((NullVector) vector); - default: - // no-op, shouldn't get here - throw new UnsupportedOperationException(); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BaseColumnBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BaseColumnBinder.java deleted file mode 100644 index e6702b3..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BaseColumnBinder.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.FieldVector; - -/** - * Base class for ColumnBinder implementations. - * - * @param The concrete FieldVector subtype. - */ -public abstract class BaseColumnBinder implements ColumnBinder { - protected final V vector; - protected final int jdbcType; - - public BaseColumnBinder(V vector, int jdbcType) { - this.vector = vector; - this.jdbcType = jdbcType; - } - - @Override - public int getJdbcType() { - return jdbcType; - } - - @Override - public V getVector() { - return vector; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BigIntBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BigIntBinder.java deleted file mode 100644 index da91d17..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BigIntBinder.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.BigIntVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A column binder for 8-bit integers. - */ -public class BigIntBinder extends BaseColumnBinder { - public BigIntBinder(BigIntVector vector) { - this(vector, Types.BIGINT); - } - - public BigIntBinder(BigIntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final long value = vector.getDataBuffer().getLong((long) rowIndex * BigIntVector.TYPE_WIDTH); - statement.setLong(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BitBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BitBinder.java deleted file mode 100644 index ebd5909..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/BitBinder.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.BitVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A column binder for booleans. - */ -public class BitBinder extends BaseColumnBinder { - public BitBinder(BitVector vector) { - this(vector, Types.BOOLEAN); - } - - public BitBinder(BitVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - // See BitVector#getBit - final int byteIndex = rowIndex >> 3; - final byte b = vector.getDataBuffer().getByte(byteIndex); - final int bitIndex = rowIndex & 7; - final int value = (b >> bitIndex) & 0x01; - statement.setBoolean(parameterIndex, value != 0); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinder.java deleted file mode 100644 index f518150..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinder.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.FieldVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; - -/** - * A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. - */ -public interface ColumnBinder { - /** - * Create a column binder for a vector, using the default JDBC type code for null values. - */ - static ColumnBinder forVector(FieldVector vector) { - return forVector(vector, /*jdbcType*/ null); - } - - /** - * Create a column binder for a vector, overriding the JDBC type code used for null values. - * - * @param vector The vector that the column binder will wrap. - * @param jdbcType The JDBC type code to use (or null to use the default). - */ - static ColumnBinder forVector(FieldVector vector, Integer jdbcType) { - final ColumnBinder binder = vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); - if (vector.getField().isNullable()) { - return new NullableColumnBinder(binder); - } - return binder; - } - - /** - * Bind the given row to the given parameter. - * - * @param statement The statement to bind to. - * @param parameterIndex The parameter to bind to (1-indexed) - * @param rowIndex The row to bind values from (0-indexed) - * @throws SQLException if an error occurs - */ - void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException; - - /** - * Get the JDBC type code used by this binder. - * - * @return A type code from {@link java.sql.Types}. - */ - int getJdbcType(); - - /** - * Get the vector used by this binder. - */ - FieldVector getVector(); -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinderArrowTypeVisitor.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinderArrowTypeVisitor.java deleted file mode 100644 index 89ce9ab..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ColumnBinderArrowTypeVisitor.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.*; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -import java.sql.Types; -import java.time.ZoneId; -import java.util.Calendar; -import java.util.TimeZone; - -/** - * Visitor to create the base ColumnBinder for a vector. - *

- * To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. - */ -public class ColumnBinderArrowTypeVisitor implements ArrowType.ArrowTypeVisitor { - private final FieldVector vector; - private final Integer jdbcType; - - /** - * Create a binder using a custom JDBC type code. - * - * @param vector The vector that the binder will wrap. - * @param jdbcType The JDBC type code (or null to use the default). - */ - public ColumnBinderArrowTypeVisitor(FieldVector vector, Integer jdbcType) { - this.vector = vector; - this.jdbcType = jdbcType; - } - - @Override - public ColumnBinder visit(ArrowType.Null type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Struct type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.List type) { - return new ListBinder((ListVector) vector); - } - - @Override - public ColumnBinder visit(ArrowType.LargeList type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.FixedSizeList type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Union type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Map type) { - return new MapBinder((MapVector) vector); - } - - @Override - public ColumnBinder visit(ArrowType.Int type) { - if (!type.getIsSigned()) { - throw new UnsupportedOperationException( - "No column binder implemented for unsigned type " + type); - } - switch (type.getBitWidth()) { - case 8: - return jdbcType == null ? new TinyIntBinder((TinyIntVector) vector) : - new TinyIntBinder((TinyIntVector) vector, jdbcType); - case 16: - return jdbcType == null ? new SmallIntBinder((SmallIntVector) vector) : - new SmallIntBinder((SmallIntVector) vector, jdbcType); - case 32: - return jdbcType == null ? new IntBinder((IntVector) vector) : - new IntBinder((IntVector) vector, jdbcType); - case 64: - return jdbcType == null ? new BigIntBinder((BigIntVector) vector) : - new BigIntBinder((BigIntVector) vector, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.FloatingPoint type) { - switch (type.getPrecision()) { - case SINGLE: - return jdbcType == null ? new Float4Binder((Float4Vector) vector) : - new Float4Binder((Float4Vector) vector, jdbcType); - case DOUBLE: - return jdbcType == null ? new Float8Binder((Float8Vector) vector) : - new Float8Binder((Float8Vector) vector, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.Utf8 type) { - VarCharVector varChar = (VarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.VARCHAR) : - new VarCharBinder<>(varChar, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.LargeUtf8 type) { - LargeVarCharVector varChar = (LargeVarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) : - new VarCharBinder<>(varChar, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.Binary type) { - VarBinaryVector varBinary = (VarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.LargeBinary type) { - LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.FixedSizeBinary type) { - FixedSizeBinaryVector binary = (FixedSizeBinaryVector) vector; - return jdbcType == null ? new FixedSizeBinaryBinder(binary, Types.BINARY) : - new FixedSizeBinaryBinder(binary, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.Bool type) { - return jdbcType == null ? new BitBinder((BitVector) vector) : new BitBinder((BitVector) vector, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.Decimal type) { - if (type.getBitWidth() == 128) { - DecimalVector decimalVector = (DecimalVector) vector; - return jdbcType == null ? new Decimal128Binder(decimalVector) : new Decimal128Binder(decimalVector, jdbcType); - } else if (type.getBitWidth() == 256) { - Decimal256Vector decimalVector = (Decimal256Vector) vector; - return jdbcType == null ? new Decimal256Binder(decimalVector) : new Decimal256Binder(decimalVector, jdbcType); - } - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Date type) { - switch (type.getUnit()) { - case DAY: - return jdbcType == null ? new DateDayBinder((DateDayVector) vector) : - new DateDayBinder((DateDayVector) vector, /*calendar*/null, jdbcType); - case MILLISECOND: - return jdbcType == null ? new DateMilliBinder((DateMilliVector) vector) : - new DateMilliBinder((DateMilliVector) vector, /*calendar*/null, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.Time type) { - switch (type.getUnit()) { - case SECOND: - return jdbcType == null ? new Time32Binder((TimeSecVector) vector) : - new Time32Binder((TimeSecVector) vector, jdbcType); - case MILLISECOND: - return jdbcType == null ? new Time32Binder((TimeMilliVector) vector) : - new Time32Binder((TimeMilliVector) vector, jdbcType); - case MICROSECOND: - return jdbcType == null ? new Time64Binder((TimeMicroVector) vector) : - new Time64Binder((TimeMicroVector) vector, jdbcType); - case NANOSECOND: - return jdbcType == null ? new Time64Binder((TimeNanoVector) vector) : - new Time64Binder((TimeNanoVector) vector, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.Timestamp type) { - Calendar calendar = null; - final String timezone = type.getTimezone(); - if (timezone != null && !timezone.isEmpty()) { - calendar = Calendar.getInstance(TimeZone.getTimeZone(ZoneId.of(timezone))); - } - return new TimeStampBinder((TimeStampVector) vector, calendar); - } - - @Override - public ColumnBinder visit(ArrowType.Interval type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Duration type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateDayBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateDayBinder.java deleted file mode 100644 index d1bd580..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateDayBinder.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.DateDayVector; - -import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import java.util.Calendar; - -/** - * A column binder for 32-bit dates. - */ -public class DateDayBinder extends BaseColumnBinder { - private static final long MILLIS_PER_DAY = 86_400_000; - private final Calendar calendar; - - public DateDayBinder(DateDayVector vector) { - this(vector, null, Types.DATE); - } - - public DateDayBinder(DateDayVector vector, Calendar calendar) { - this(vector, calendar, Types.DATE); - } - - public DateDayBinder(DateDayVector vector, Calendar calendar, int jdbcType) { - super(vector, jdbcType); - this.calendar = calendar; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - // TODO: multiply with overflow - final long index = (long) rowIndex * DateDayVector.TYPE_WIDTH; - final Date value = new Date(vector.getDataBuffer().getInt(index) * MILLIS_PER_DAY); - if (calendar == null) { - statement.setDate(parameterIndex, value); - } else { - statement.setDate(parameterIndex, value, calendar); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateMilliBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateMilliBinder.java deleted file mode 100644 index 1d25423..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/DateMilliBinder.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.DateMilliVector; - -import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import java.util.Calendar; - -/** - * A column binder for 64-bit dates. - */ -public class DateMilliBinder extends BaseColumnBinder { - private final Calendar calendar; - - public DateMilliBinder(DateMilliVector vector) { - this(vector, null, Types.DATE); - } - - public DateMilliBinder(DateMilliVector vector, Calendar calendar) { - this(vector, calendar, Types.DATE); - } - - - public DateMilliBinder(DateMilliVector vector, Calendar calendar, int jdbcType) { - super(vector, jdbcType); - this.calendar = calendar; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final long index = (long) rowIndex * DateMilliVector.TYPE_WIDTH; - final Date value = new Date(vector.getDataBuffer().getLong(index)); - if (calendar == null) { - statement.setDate(parameterIndex, value); - } else { - statement.setDate(parameterIndex, value, calendar); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal128Binder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal128Binder.java deleted file mode 100644 index 8a00fd8..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal128Binder.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.util.DecimalUtility; - -import java.math.BigDecimal; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A binder for 128-bit decimals. - */ -public class Decimal128Binder extends BaseColumnBinder { - public Decimal128Binder(DecimalVector vector) { - this(vector, Types.DECIMAL); - } - - public Decimal128Binder(DecimalVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); - statement.setBigDecimal(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal256Binder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal256Binder.java deleted file mode 100644 index 314534e..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Decimal256Binder.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.util.DecimalUtility; - -import java.math.BigDecimal; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A binder for 256-bit decimals. - */ -public class Decimal256Binder extends BaseColumnBinder { - public Decimal256Binder(Decimal256Vector vector) { - this(vector, Types.DECIMAL); - } - - public Decimal256Binder(Decimal256Vector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); - statement.setBigDecimal(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/FixedSizeBinaryBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/FixedSizeBinaryBinder.java deleted file mode 100644 index b202891..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/FixedSizeBinaryBinder.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.FixedSizeBinaryVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; - -/** - * A binder for fixed-width binary types. - */ -public class FixedSizeBinaryBinder extends BaseColumnBinder { - /** - * Create a binder for the given vector using the given JDBC type for null values. - * - * @param vector The vector to draw values from. - * @param jdbcType The JDBC type code. - */ - public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - byte[] binaryData = new byte[vector.getByteWidth()]; - vector.getDataBuffer().getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); - statement.setBytes(parameterIndex, binaryData); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float4Binder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float4Binder.java deleted file mode 100644 index 01b6606..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float4Binder.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.Float4Vector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A binder for 32-bit floats. - */ -public class Float4Binder extends BaseColumnBinder { - public Float4Binder(Float4Vector vector) { - this(vector, Types.REAL); - } - - public Float4Binder(Float4Vector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final float value = vector.getDataBuffer().getFloat((long) rowIndex * Float4Vector.TYPE_WIDTH); - statement.setFloat(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float8Binder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float8Binder.java deleted file mode 100644 index 1568657..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Float8Binder.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.Float8Vector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A binder for 64-bit floats. - */ -public class Float8Binder extends BaseColumnBinder { - public Float8Binder(Float8Vector vector) { - this(vector, Types.DOUBLE); - } - - public Float8Binder(Float8Vector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final double value = vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); - statement.setDouble(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/IntBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/IntBinder.java deleted file mode 100644 index 77291e0..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/IntBinder.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.IntVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A column binder for 32-bit integers. - */ -public class IntBinder extends BaseColumnBinder { - public IntBinder(IntVector vector) { - this(vector, Types.INTEGER); - } - - public IntBinder(IntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final int value = vector.getDataBuffer().getInt((long) rowIndex * IntVector.TYPE_WIDTH); - statement.setInt(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ListBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ListBinder.java deleted file mode 100644 index 0d09456..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/ListBinder.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.util.Text; - -import java.lang.reflect.Array; -import java.util.ArrayList; -import java.util.Arrays; - -/** - * A column binder for list of primitive values. - */ -public class ListBinder extends BaseColumnBinder { - - private final UnionListReader listReader; - private final Class arrayElementClass; - private final boolean isTextColumn; - - public ListBinder(ListVector vector) { - this(vector, java.sql.Types.ARRAY); - } - - /** - * Init ListBinder and determine type of data vector. - * - * @param vector corresponding data vector from arrow buffer for binding - * @param jdbcType parameter jdbc type - */ - public ListBinder(ListVector vector, int jdbcType) { - super(vector, jdbcType); - listReader = vector.getReader(); - Class dataVectorClass = vector.getDataVector().getClass(); - try { - arrayElementClass = dataVectorClass.getMethod("getObject", Integer.TYPE).getReturnType(); - } catch (NoSuchMethodException e) { - final String message = String.format("Issue to determine type for getObject method of data vector class %s ", - dataVectorClass.getName()); - throw new RuntimeException(message); - } - isTextColumn = arrayElementClass.isAssignableFrom(Text.class); - } - - @Override - public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex) throws java.sql.SQLException { - listReader.setPosition(rowIndex); - ArrayList sourceArray = (ArrayList) listReader.readObject(); - Object array; - if (!isTextColumn) { - array = Array.newInstance(arrayElementClass, sourceArray.size()); - Arrays.setAll((Object[]) array, sourceArray::get); - } else { - array = new String[sourceArray.size()]; - Arrays.setAll((Object[]) array, idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); - } - statement.setObject(parameterIndex, array); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/MapBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/MapBinder.java deleted file mode 100644 index 25b0d74..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/MapBinder.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.JsonStringHashMap; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Objects; - -/** - * A column binder for map of primitive values. - */ -public class MapBinder extends BaseColumnBinder { - - private final boolean isTextKey; - private final boolean isTextValue; - private UnionMapReader reader; - - public MapBinder(MapVector vector) { - this(vector, Types.VARCHAR); - } - - /** - * Init MapBinder and determine type of data vector. - * - * @param vector corresponding data vector from arrow buffer for binding - * @param jdbcType parameter jdbc type - */ - public MapBinder(MapVector vector, int jdbcType) { - super(vector, jdbcType); - reader = vector.getReader(); - List structField = Objects.requireNonNull(vector.getField()).getChildren(); - if (structField.size() != 1) { - throw new IllegalArgumentException("Expected Struct field metadata inside Map field"); - } - List keyValueFields = Objects.requireNonNull(structField.get(0)).getChildren(); - if (keyValueFields.size() != 2) { - throw new IllegalArgumentException("Expected two children fields " + - "inside nested Struct field in Map"); - } - ArrowType keyType = Objects.requireNonNull(keyValueFields.get(0)).getType(); - ArrowType valueType = Objects.requireNonNull(keyValueFields.get(1)).getType(); - isTextKey = ArrowType.Utf8.INSTANCE.equals(keyType); - isTextValue = ArrowType.Utf8.INSTANCE.equals(valueType); - } - - @Override - public void bind(PreparedStatement statement, - int parameterIndex, int rowIndex) throws SQLException { - reader.setPosition(rowIndex); - LinkedHashMap tags = new JsonStringHashMap<>(); - while (reader.next()) { - Object key = reader.key().readObject(); - Object value = reader.value().readObject(); - tags.put(isTextKey && key != null ? key.toString() : key, - isTextValue && value != null ? value.toString() : value); - } - switch (jdbcType) { - case Types.VARCHAR: - statement.setString(parameterIndex, tags.toString()); - break; - case Types.OTHER: - default: - statement.setObject(parameterIndex, tags); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/NullableColumnBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/NullableColumnBinder.java deleted file mode 100644 index f765462..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/NullableColumnBinder.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.FieldVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; - -/** - * A ColumnBinder that checks for nullability before deferring to a type-specific binder. - */ -public class NullableColumnBinder implements ColumnBinder { - private final ColumnBinder wrapped; - - public NullableColumnBinder(ColumnBinder wrapped) { - this.wrapped = wrapped; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - if (wrapped.getVector().isNull(rowIndex)) { - statement.setNull(parameterIndex, wrapped.getJdbcType()); - } else { - wrapped.bind(statement, parameterIndex, rowIndex); - } - } - - @Override - public int getJdbcType() { - return wrapped.getJdbcType(); - } - - @Override - public FieldVector getVector() { - return wrapped.getVector(); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/SmallIntBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/SmallIntBinder.java deleted file mode 100644 index 87f75a6..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/SmallIntBinder.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.SmallIntVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A column binder for 8-bit integers. - */ -public class SmallIntBinder extends BaseColumnBinder { - public SmallIntBinder(SmallIntVector vector) { - this(vector, Types.SMALLINT); - } - - public SmallIntBinder(SmallIntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final short value = vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); - statement.setShort(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time32Binder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time32Binder.java deleted file mode 100644 index d01b737..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time32Binder.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeSecVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Types; - -/** - * A binder for 32-bit time types. - */ -public class Time32Binder extends BaseColumnBinder { - private static final long TYPE_WIDTH = 4; - - private final long factor; - - public Time32Binder(TimeSecVector vector) { - this(vector, Types.TIME); - } - - public Time32Binder(TimeMilliVector vector) { - this(vector, Types.TIME); - } - - public Time32Binder(TimeSecVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); - } - - public Time32Binder(TimeMilliVector vector, int jdbcType) { - this(vector, /*factor*/1, jdbcType); - } - - Time32Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { - super(vector, jdbcType); - this.factor = factor; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - // TODO: multiply with overflow - // TODO: take in a Calendar as well? - final Time value = new Time(vector.getDataBuffer().getInt(rowIndex * TYPE_WIDTH) * factor); - statement.setTime(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time64Binder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time64Binder.java deleted file mode 100644 index 12a8d5a..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/Time64Binder.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeNanoVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Types; - -/** - * A binder for 64-bit time types. - */ -public class Time64Binder extends BaseColumnBinder { - private static final long TYPE_WIDTH = 8; - - private final long factor; - - public Time64Binder(TimeMicroVector vector) { - this(vector, Types.TIME); - } - - public Time64Binder(TimeNanoVector vector) { - this(vector, Types.TIME); - } - - public Time64Binder(TimeMicroVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); - } - - public Time64Binder(TimeNanoVector vector, int jdbcType) { - this(vector, /*factor*/1_000_000, jdbcType); - } - - Time64Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { - super(vector, jdbcType); - this.factor = factor; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - // TODO: option to throw on truncation (vendor Guava IntMath#multiply) - final Time value = new Time(vector.getDataBuffer().getLong(rowIndex * TYPE_WIDTH) / factor); - statement.setTime(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TimeStampBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TimeStampBinder.java deleted file mode 100644 index da859d1..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TimeStampBinder.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.sql.Types; -import java.util.Calendar; - -/** - * A column binder for timestamps. - */ -public class TimeStampBinder extends BaseColumnBinder { - private final Calendar calendar; - private final long unitsPerSecond; - private final long nanosPerUnit; - - /** - * Create a binder for a timestamp vector using the default JDBC type code. - */ - public TimeStampBinder(TimeStampVector vector, Calendar calendar) { - this(vector, calendar, isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); - } - - /** - * Create a binder for a timestamp vector. - * - * @param vector The vector to pull values from. - * @param calendar Optionally, the calendar to pass to JDBC. - * @param jdbcType The JDBC type code to use for null values. - */ - public TimeStampBinder(TimeStampVector vector, Calendar calendar, int jdbcType) { - super(vector, jdbcType); - this.calendar = calendar; - - final ArrowType.Timestamp type = (ArrowType.Timestamp) vector.getField().getType(); - switch (type.getUnit()) { - case SECOND: - this.unitsPerSecond = 1; - this.nanosPerUnit = 1_000_000_000; - break; - case MILLISECOND: - this.unitsPerSecond = 1_000; - this.nanosPerUnit = 1_000_000; - break; - case MICROSECOND: - this.unitsPerSecond = 1_000_000; - this.nanosPerUnit = 1_000; - break; - case NANOSECOND: - this.unitsPerSecond = 1_000_000_000; - this.nanosPerUnit = 1; - break; - default: - throw new IllegalArgumentException("Invalid time unit in " + type); - } - } - - private static boolean isZoned(ArrowType type) { - final String timezone = ((ArrowType.Timestamp) type).getTimezone(); - return timezone != null && !timezone.isEmpty(); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - // TODO: option to throw on truncation (vendor Guava IntMath#multiply) or overflow - final long rawValue = vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); - final long seconds = rawValue / unitsPerSecond; - final int nanos = (int) ((rawValue - (seconds * unitsPerSecond)) * nanosPerUnit); - final Timestamp value = new Timestamp(seconds * 1_000); - value.setNanos(nanos); - if (calendar != null) { - // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC value with a - // zone offset, so we don't need to do any conversion. - statement.setTimestamp(parameterIndex, value, calendar); - } else { - // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is technically wrong, - // but there is no 'correct' interpretation here. The application should provide a calendar. - statement.setTimestamp(parameterIndex, value); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TinyIntBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TinyIntBinder.java deleted file mode 100644 index 616bca2..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/TinyIntBinder.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.vector.TinyIntVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; - -/** - * A column binder for 8-bit integers. - */ -public class TinyIntBinder extends BaseColumnBinder { - public TinyIntBinder(TinyIntVector vector) { - this(vector, Types.TINYINT); - } - - public TinyIntBinder(TinyIntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final byte value = vector.getDataBuffer().getByte((long) rowIndex * TinyIntVector.TYPE_WIDTH); - statement.setByte(parameterIndex, value); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarBinaryBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarBinaryBinder.java deleted file mode 100644 index 5cb3dba..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarBinaryBinder.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.vector.ElementAddressableVector; -import org.apache.arrow.vector.FieldVector; - -import java.sql.PreparedStatement; -import java.sql.SQLException; - -/** - * A binder for variable-width binary types. - * - * @param The binary vector. - */ -public class VarBinaryBinder extends BaseColumnBinder { - private final ArrowBufPointer element; - - /** - * Create a binder for the given vector using the given JDBC type for null values. - * - * @param vector The vector to draw values from. - * @param jdbcType The JDBC type code. - */ - public VarBinaryBinder(T vector, int jdbcType) { - super(vector, jdbcType); - this.element = new ArrowBufPointer(); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - vector.getDataPointer(rowIndex, element); - if (element.getBuf() == null) { - statement.setNull(parameterIndex, jdbcType); - return; - } - if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); - throw new RuntimeException(message); - } - byte[] binaryData = new byte[(int) element.getLength()]; - element.getBuf().getBytes(element.getOffset(), binaryData); - statement.setBytes(parameterIndex, binaryData); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarCharBinder.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarCharBinder.java deleted file mode 100644 index eb458f8..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/VarCharBinder.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; - -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VariableWidthVector; - -import java.nio.charset.StandardCharsets; -import java.sql.PreparedStatement; -import java.sql.SQLException; - -/** - * A binder for variable-width string types. - * - * @param The text vector. - */ -public class VarCharBinder extends BaseColumnBinder { - private final ArrowBufPointer element; - - /** - * Create a binder for the given vector using the given JDBC type for null values. - * - * @param vector The vector to draw values from. - * @param jdbcType The JDBC type code. - */ - public VarCharBinder(T vector, int jdbcType) { - super(vector, jdbcType); - this.element = new ArrowBufPointer(); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - vector.getDataPointer(rowIndex, element); - if (element.getBuf() == null) { - statement.setNull(parameterIndex, jdbcType); - return; - } - if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); - throw new RuntimeException(message); - } - byte[] utf8Bytes = new byte[(int) element.getLength()]; - element.getBuf().getBytes(element.getOffset(), utf8Bytes); - statement.setString(parameterIndex, new String(utf8Bytes, StandardCharsets.UTF_8)); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/package-info.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/package-info.java deleted file mode 100644 index 76674e0..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/binder/package-info.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Utilities to bind Arrow data as JDBC prepared statement parameters. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.binder; diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ArrayConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ArrayConsumer.java deleted file mode 100644 index 85cdb09..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ArrayConsumer.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.ListVector; - -import java.io.IOException; -import java.sql.Array; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume array type values from {@link ResultSet}. - * Write the data to {@link ListVector}. - */ -public abstract class ArrayConsumer extends BaseConsumer { - - protected final JdbcConsumer delegate; - private final ValueVector innerVector; - protected int innerVectorIndex = 0; - - /** - * Instantiate a ArrayConsumer. - */ - public ArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { - super(vector, index); - this.delegate = delegate; - this.innerVector = vector.getDataVector(); - } - - /** - * Creates a consumer for {@link ListVector}. - */ - public static ArrayConsumer createConsumer( - ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { - if (nullable) { - return new NullableArrayConsumer(vector, delegate, index); - } else { - return new NonNullableArrayConsumer(vector, delegate, index); - } - } - - @Override - public void close() throws Exception { - this.vector.close(); - this.delegate.close(); - } - - @Override - public void resetValueVector(ListVector vector) { - super.resetValueVector(vector); - - FieldVector childVector = vector.getDataVector(); - this.delegate.resetValueVector(childVector); - - innerVectorIndex = 0; - } - - void ensureInnerVectorCapacity(int targetCapacity) { - while (innerVector.getValueCapacity() < targetCapacity) { - innerVector.reAlloc(); - } - } - - /** - * Nullable consumer for {@link ListVector}. - */ - static class NullableArrayConsumer extends ArrayConsumer { - - /** - * Instantiate a nullable array consumer. - */ - public NullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { - super(vector, delegate, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - final Array array = resultSet.getArray(columnIndexInResultSet); - if (!resultSet.wasNull()) { - vector.startNewValue(currentIndex); - int count = 0; - try (ResultSet rs = array.getResultSet()) { - while (rs.next()) { - ensureInnerVectorCapacity(innerVectorIndex + count + 1); - delegate.consume(rs); - count++; - } - } - vector.endValue(currentIndex, count); - innerVectorIndex += count; - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for {@link ListVector}. - */ - static class NonNullableArrayConsumer extends ArrayConsumer { - - /** - * Instantiate a nullable array consumer. - */ - public NonNullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { - super(vector, delegate, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - final Array array = resultSet.getArray(columnIndexInResultSet); - vector.startNewValue(currentIndex); - int count = 0; - try (ResultSet rs = array.getResultSet()) { - while (rs.next()) { - ensureInnerVectorCapacity(innerVectorIndex + count + 1); - delegate.consume(rs); - count++; - } - } - vector.endValue(currentIndex, count); - innerVectorIndex += count; - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BaseConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BaseConsumer.java deleted file mode 100644 index c896941..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BaseConsumer.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.ValueVector; - -/** - * Base class for all consumers. - * - * @param vector type. - */ -public abstract class BaseConsumer implements JdbcConsumer { - - protected final int columnIndexInResultSet; - protected V vector; - protected int currentIndex; - - /** - * Constructs a new consumer. - * - * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. - */ - public BaseConsumer(V vector, int index) { - this.vector = vector; - this.columnIndexInResultSet = index; - } - - @Override - public void close() throws Exception { - this.vector.close(); - } - - @Override - public void resetValueVector(V vector) { - this.vector = vector; - this.currentIndex = 0; - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BigIntConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BigIntConsumer.java deleted file mode 100644 index 4e7f5b1..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BigIntConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.BigIntVector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume bigint type values from {@link ResultSet}. - * Write the data to {@link BigIntVector}. - */ -public class BigIntConsumer { - - /** - * Creates a consumer for {@link BigIntVector}. - */ - public static JdbcConsumer createConsumer(BigIntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableBigIntConsumer(vector, index); - } else { - return new NonNullableBigIntConsumer(vector, index); - } - } - - /** - * Nullable consumer for big int. - */ - static class NullableBigIntConsumer extends BaseConsumer { - - /** - * Instantiate a BigIntConsumer. - */ - public NullableBigIntConsumer(BigIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - long value = resultSet.getLong(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for big int. - */ - static class NonNullableBigIntConsumer extends BaseConsumer { - - /** - * Instantiate a BigIntConsumer. - */ - public NonNullableBigIntConsumer(BigIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - long value = resultSet.getLong(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BinaryConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BinaryConsumer.java deleted file mode 100644 index 0b05edd..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BinaryConsumer.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.VarBinaryVector; - -import java.io.IOException; -import java.io.InputStream; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume binary type values from {@link ResultSet}. - * Write the data to {@link VarBinaryVector}. - */ -public abstract class BinaryConsumer extends BaseConsumer { - - private final byte[] reuseBytes = new byte[1024]; - - /** - * Instantiate a BinaryConsumer. - */ - public BinaryConsumer(VarBinaryVector vector, int index) { - super(vector, index); - if (vector != null) { - vector.allocateNewSafe(); - } - } - - /** - * Creates a consumer for {@link VarBinaryVector}. - */ - public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableBinaryConsumer(vector, index); - } else { - return new NonNullableBinaryConsumer(vector, index); - } - } - - /** - * consume a InputStream. - */ - public void consume(InputStream is) throws IOException { - if (is != null) { - while (currentIndex >= vector.getValueCapacity()) { - vector.reallocValidityAndOffsetBuffers(); - } - final int startOffset = vector.getStartOffset(currentIndex); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int dataLength = 0; - int read; - while ((read = is.read(reuseBytes)) != -1) { - while (vector.getDataBuffer().capacity() < (startOffset + dataLength + read)) { - vector.reallocDataBuffer(); - } - vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read); - dataLength += read; - } - offsetBuffer.setInt((currentIndex + 1) * VarBinaryVector.OFFSET_WIDTH, startOffset + dataLength); - BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); - vector.setLastSet(currentIndex); - } else { - final int startOffset = vector.getStartOffset(currentIndex); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - offsetBuffer.setInt((currentIndex + 1) * VarBinaryVector.OFFSET_WIDTH, startOffset + 0); - vector.setLastSet(currentIndex); - } - } - - public void moveWriterPosition() { - currentIndex++; - } - - @Override - public void resetValueVector(VarBinaryVector vector) { - this.vector = vector; - this.vector.allocateNewSafe(); - this.currentIndex = 0; - } - - /** - * Consumer for nullable binary data. - */ - static class NullableBinaryConsumer extends BinaryConsumer { - - /** - * Instantiate a BinaryConsumer. - */ - public NullableBinaryConsumer(VarBinaryVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - InputStream is = resultSet.getBinaryStream(columnIndexInResultSet); - consume(is); - moveWriterPosition(); - } - } - - /** - * Consumer for non-nullable binary data. - */ - static class NonNullableBinaryConsumer extends BinaryConsumer { - - /** - * Instantiate a BinaryConsumer. - */ - public NonNullableBinaryConsumer(VarBinaryVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - InputStream is = resultSet.getBinaryStream(columnIndexInResultSet); - consume(is); - moveWriterPosition(); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BitConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BitConsumer.java deleted file mode 100644 index 6f935e4..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BitConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.BitVector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume bit type values from {@link ResultSet}. - * Write the data to {@link BitVector}. - */ -public class BitConsumer { - - /** - * Creates a consumer for {@link BitVector}. - */ - public static JdbcConsumer createConsumer(BitVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableBitConsumer(vector, index); - } else { - return new NonNullableBitConsumer(vector, index); - } - } - - /** - * Nullable consumer for {@link BitVector}. - */ - static class NullableBitConsumer extends BaseConsumer { - - /** - * Instantiate a BitConsumer. - */ - public NullableBitConsumer(BitVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - boolean value = resultSet.getBoolean(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value ? 1 : 0); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for {@link BitVector}. - */ - static class NonNullableBitConsumer extends BaseConsumer { - - /** - * Instantiate a BitConsumer. - */ - public NonNullableBitConsumer(BitVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - boolean value = resultSet.getBoolean(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value ? 1 : 0); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BlobConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BlobConsumer.java deleted file mode 100644 index 153c187..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/BlobConsumer.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.VarBinaryVector; - -import java.io.IOException; -import java.sql.Blob; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume blob type values from {@link ResultSet}. - * Write the data to {@link VarBinaryVector}. - */ -public class BlobConsumer extends BaseConsumer { - - private final boolean nullable; - private BinaryConsumer delegate; - - /** - * Instantiate a BlobConsumer. - */ - public BlobConsumer(BinaryConsumer delegate, int index, boolean nullable) { - super(null, index); - this.delegate = delegate; - this.nullable = nullable; - } - - /** - * Creates a consumer for {@link VarBinaryVector}. - */ - public static BlobConsumer createConsumer( - BinaryConsumer delegate, int index, boolean nullable) { - return new BlobConsumer(delegate, index, nullable); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - Blob blob = resultSet.getBlob(columnIndexInResultSet); - if (blob != null) { - delegate.consume(blob.getBinaryStream()); - } - delegate.moveWriterPosition(); - } - - @Override - public void close() throws Exception { - delegate.close(); - } - - @Override - public void resetValueVector(VarBinaryVector vector) { - delegate = BinaryConsumer.createConsumer(vector, columnIndexInResultSet, nullable); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ClobConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ClobConsumer.java deleted file mode 100644 index 7f737b0..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/ClobConsumer.java +++ /dev/null @@ -1,168 +0,0 @@ -///* -// * Licensed to the Apache Software Foundation (ASF) under one or more -// * contributor license agreements. See the NOTICE file distributed with -// * this work for additional information regarding copyright ownership. -// * The ASF licenses this file to You under the Apache License, Version 2.0 -// * (the "License"); you may not use this file except in compliance with -// * the License. You may obtain a copy of the License at -// * -// * http://www.apache.org/licenses/LICENSE-2.0 -// * -// * Unless required by applicable law or agreed to in writing, software -// * distributed under the License is distributed on an "AS IS" BASIS, -// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// * See the License for the specific language governing permissions and -// * limitations under the License. -// */ -// -//package com.antgroup.antchain.fastdf.dataproxy.service.connector.rdbms.adaptor.consumer; -// -//import org.apache.arrow.memory.ArrowBuf; -//import org.apache.arrow.memory.util.MemoryUtil; -//import org.apache.arrow.vector.BitVectorHelper; -//import org.apache.arrow.vector.VarCharVector; -// -//import java.nio.charset.StandardCharsets; -//import java.sql.Clob; -//import java.sql.ResultSet; -//import java.sql.SQLException; -// -///** -// * Consumer which consume clob type values from {@link ResultSet}. -// * Write the data to {@link VarCharVector}. -// */ -//public abstract class ClobConsumer extends BaseConsumer { -// -// /** -// * Creates a consumer for {@link VarCharVector}. -// */ -// public static ClobConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { -// if (nullable) { -// return new NullableClobConsumer(vector, index); -// } else { -// return new NonNullableClobConsumer(vector, index); -// } -// } -// -// private static final int BUFFER_SIZE = 256; -// -// /** -// * Instantiate a ClobConsumer. -// */ -// public ClobConsumer(VarCharVector vector, int index) { -// super(vector, index); -// if (vector != null) { -// vector.allocateNewSafe(); -// } -// } -// -// @Override -// public void resetValueVector(VarCharVector vector) { -// this.vector = vector; -// this.vector.allocateNewSafe(); -// this.currentIndex = 0; -// } -// -// /** -// * Nullable consumer for clob data. -// */ -// static class NullableClobConsumer extends ClobConsumer { -// -// /** -// * Instantiate a ClobConsumer. -// */ -// public NullableClobConsumer(VarCharVector vector, int index) { -// super(vector, index); -// } -// -// @Override -// public void consume(ResultSet resultSet) throws SQLException { -// Clob clob = resultSet.getClob(columnIndexInResultSet); -// if (!resultSet.wasNull()) { -// if (clob != null) { -// long length = clob.length(); -// -// int read = 1; -// int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE; -// int totalBytes = 0; -// -// ArrowBuf dataBuffer = vector.getDataBuffer(); -// ArrowBuf offsetBuffer = vector.getOffsetBuffer(); -// int startIndex = offsetBuffer.getInt(currentIndex * 4); -// while (read <= length) { -// String str = clob.getSubString(read, readSize); -// byte[] bytes = str.getBytes(StandardCharsets.UTF_8); -// -// while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) { -// vector.reallocDataBuffer(); -// } -// MemoryUtil.UNSAFE.copyMemory( -// bytes, -// MemoryUtil.BYTE_ARRAY_BASE_OFFSET, -// null, -// dataBuffer.memoryAddress() + startIndex + totalBytes, -// bytes.length); -// -// totalBytes += bytes.length; -// read += readSize; -// } -// offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); -// BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); -// vector.setLastSet(currentIndex); -// } -// } -// currentIndex++; -// } -// } -// -// /** -// * Non-nullable consumer for clob data. -// */ -// static class NonNullableClobConsumer extends ClobConsumer { -// -// /** -// * Instantiate a ClobConsumer. -// */ -// public NonNullableClobConsumer(VarCharVector vector, int index) { -// super(vector, index); -// } -// -// @Override -// public void consume(ResultSet resultSet) throws SQLException { -// Clob clob = resultSet.getClob(columnIndexInResultSet); -// if (clob != null) { -// long length = clob.length(); -// -// int read = 1; -// int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE; -// int totalBytes = 0; -// -// ArrowBuf dataBuffer = vector.getDataBuffer(); -// ArrowBuf offsetBuffer = vector.getOffsetBuffer(); -// int startIndex = offsetBuffer.getInt(currentIndex * 4); -// while (read <= length) { -// String str = clob.getSubString(read, readSize); -// byte[] bytes = str.getBytes(StandardCharsets.UTF_8); -// -// while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) { -// vector.reallocDataBuffer(); -// } -// MemoryUtil.UNSAFE.copyMemory( -// bytes, -// MemoryUtil.BYTE_ARRAY_BASE_OFFSET, -// null, -// dataBuffer.memoryAddress() + startIndex + totalBytes, -// bytes.length); -// -// totalBytes += bytes.length; -// read += readSize; -// } -// offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); -// BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); -// vector.setLastSet(currentIndex); -// } -// -// currentIndex++; -// } -// } -//} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/CompositeJdbcConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/CompositeJdbcConsumer.java deleted file mode 100644 index a2d4dd7..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/CompositeJdbcConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VectorSchemaRoot; - -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Composite consumer which hold all consumers. - * It manages the consume and cleanup process. - */ -public class CompositeJdbcConsumer implements JdbcConsumer { - - private final JdbcConsumer[] consumers; - - /** - * Construct an instance. - */ - public CompositeJdbcConsumer(JdbcConsumer[] consumers) { - this.consumers = consumers; - } - - @Override - public void consume(ResultSet rs) throws SQLException, IOException { - for (int i = 0; i < consumers.length; i++) { - consumers[i].consume(rs); - } - } - - @Override - public void close() { - - try { - // clean up - AutoCloseables.close(consumers); - } catch (Exception e) { - throw new RuntimeException("Error occurred while releasing resources.", e); - } - - } - - @Override - public void resetValueVector(ValueVector vector) { - - } - - /** - * Reset inner consumers through vectors in the vector schema root. - */ - public void resetVectorSchemaRoot(VectorSchemaRoot root) { - assert root.getFieldVectors().size() == consumers.length; - for (int i = 0; i < consumers.length; i++) { - consumers[i].resetValueVector(root.getVector(i)); - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DateConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DateConsumer.java deleted file mode 100644 index a6c207a..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DateConsumer.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; - -import java.sql.Date; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Calendar; -import java.util.concurrent.TimeUnit; - -/** - * Consumer which consume date type values from {@link ResultSet}. - * Write the data to {@link DateDayVector}. - */ -public class DateConsumer { - - /** - * Creates a consumer for {@link DateMilliVector}. - */ - public static JdbcConsumer createConsumer( - DateDayVector vector, int index, boolean nullable, Calendar calendar) { - if (nullable) { - return new NullableDateConsumer(vector, index, calendar); - } else { - return new NonNullableDateConsumer(vector, index, calendar); - } - } - - /** - * Nullable consumer for date. - */ - static class NullableDateConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** - * Instantiate a DateConsumer. - */ - public NullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); - } - - /** - * Instantiate a DateConsumer. - */ - public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for date. - */ - static class NonNullableDateConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** - * Instantiate a DateConsumer. - */ - public NonNullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); - } - - /** - * Instantiate a DateConsumer. - */ - public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DecimalConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DecimalConsumer.java deleted file mode 100644 index 5fec433..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DecimalConsumer.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.DecimalVector; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume decimal type values from {@link ResultSet}. - * Write the data to {@link DecimalVector}. - */ -public abstract class DecimalConsumer extends BaseConsumer { - private final RoundingMode bigDecimalRoundingMode; - private final int scale; - - /** - * Constructs a new consumer. - * - * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. - */ - public DecimalConsumer(DecimalVector vector, int index) { - this(vector, index, null); - } - - /** - * Constructs a new consumer, with optional coercibility. - * - * @param vector the underlying vector for the consumer. - * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that - * of the target vector. Set to null to retain strict matching behavior (scale of - * source and target vector must match exactly). - */ - public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index); - this.bigDecimalRoundingMode = bigDecimalRoundingMode; - this.scale = vector.getScale(); - } - - /** - * Creates a consumer for {@link DecimalVector}. - */ - public static JdbcConsumer createConsumer( - DecimalVector vector, - int index, - boolean nullable, - RoundingMode bigDecimalRoundingMode - ) { - if (nullable) { - return new NullableDecimalConsumer(vector, index, bigDecimalRoundingMode); - } else { - return new NonNullableDecimalConsumer(vector, index, bigDecimalRoundingMode); - } - } - - protected void set(BigDecimal value) { - if (bigDecimalRoundingMode != null && value.scale() != scale) { - value = value.setScale(scale, bigDecimalRoundingMode); - } - vector.set(currentIndex, value); - } - - - /** - * Consumer for nullable decimal. - */ - static class NullableDecimalConsumer extends DecimalConsumer { - - /** - * Instantiate a DecimalConsumer. - */ - public NullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index, bigDecimalRoundingMode); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - set(value); - } - currentIndex++; - } - } - - /** - * Consumer for non-nullable decimal. - */ - static class NonNullableDecimalConsumer extends DecimalConsumer { - - /** - * Instantiate a DecimalConsumer. - */ - public NonNullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index, bigDecimalRoundingMode); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - set(value); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DoubleConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DoubleConsumer.java deleted file mode 100644 index 1c2ee7e..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/DoubleConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.Float8Vector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume double type values from {@link ResultSet}. - * Write the data to {@link Float8Vector}. - */ -public class DoubleConsumer { - - /** - * Creates a consumer for {@link Float8Vector}. - */ - public static JdbcConsumer createConsumer(Float8Vector vector, int index, boolean nullable) { - if (nullable) { - return new NullableDoubleConsumer(vector, index); - } else { - return new NonNullableDoubleConsumer(vector, index); - } - } - - /** - * Nullable double consumer. - */ - static class NullableDoubleConsumer extends BaseConsumer { - - /** - * Instantiate a DoubleConsumer. - */ - public NullableDoubleConsumer(Float8Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - double value = resultSet.getDouble(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** - * Non-nullable double consumer. - */ - static class NonNullableDoubleConsumer extends BaseConsumer { - - /** - * Instantiate a DoubleConsumer. - */ - public NonNullableDoubleConsumer(Float8Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - double value = resultSet.getDouble(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/FloatConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/FloatConsumer.java deleted file mode 100644 index 641b7ea..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/FloatConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.Float4Vector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume float type values from {@link ResultSet}. - * Write the data to {@link Float4Vector}. - */ -public class FloatConsumer { - - /** - * Creates a consumer for {@link Float4Vector}. - */ - public static JdbcConsumer createConsumer(Float4Vector vector, int index, boolean nullable) { - if (nullable) { - return new NullableFloatConsumer(vector, index); - } else { - return new NonNullableFloatConsumer(vector, index); - } - } - - /** - * Nullable float consumer. - */ - static class NullableFloatConsumer extends BaseConsumer { - - /** - * Instantiate a FloatConsumer. - */ - public NullableFloatConsumer(Float4Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - float value = resultSet.getFloat(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** - * Non-nullable float consumer. - */ - static class NonNullableFloatConsumer extends BaseConsumer { - - /** - * Instantiate a FloatConsumer. - */ - public NonNullableFloatConsumer(Float4Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - float value = resultSet.getFloat(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/IntConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/IntConsumer.java deleted file mode 100644 index 954dc66..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/IntConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.IntVector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume int type values from {@link ResultSet}. - * Write the data to {@link IntVector}. - */ -public class IntConsumer { - - /** - * Creates a consumer for {@link IntVector}. - */ - public static JdbcConsumer createConsumer(IntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableIntConsumer(vector, index); - } else { - return new NonNullableIntConsumer(vector, index); - } - } - - /** - * Nullable consumer for int. - */ - static class NullableIntConsumer extends BaseConsumer { - - /** - * Instantiate a IntConsumer. - */ - public NullableIntConsumer(IntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - int value = resultSet.getInt(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for int. - */ - static class NonNullableIntConsumer extends BaseConsumer { - - /** - * Instantiate a IntConsumer. - */ - public NonNullableIntConsumer(IntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - int value = resultSet.getInt(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/JdbcConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/JdbcConsumer.java deleted file mode 100644 index 78ec387..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/JdbcConsumer.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.ValueVector; - -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * An abstraction that is used to consume values from {@link ResultSet}. - * - * @param The vector within consumer or its delegate, used for partially consume purpose. - */ -public interface JdbcConsumer extends AutoCloseable { - - /** - * Consume a specific type value from {@link ResultSet} and write it to vector. - */ - void consume(ResultSet resultSet) throws SQLException, IOException; - - /** - * Close this consumer, do some clean work such as clear reuse ArrowBuf. - */ - void close() throws Exception; - - /** - * Reset the vector within consumer for partial read purpose. - */ - void resetValueVector(T vector); -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/MapConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/MapConsumer.java deleted file mode 100644 index 26df514..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/MapConsumer.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.util.ObjectMapperFactory; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Map; - -/** - * Consumer which consume map type values from {@link ResultSet}. - * Write the data into {@link MapVector}. - */ -public class MapConsumer extends BaseConsumer { - - - private final UnionMapWriter writer; - private final ObjectMapper objectMapper = ObjectMapperFactory.newObjectMapper(); - private final TypeReference> typeReference = new TypeReference>() { - }; - private int currentRow; - - /** - * Instantiate a MapConsumer. - */ - public MapConsumer(MapVector vector, int index) { - super(vector, index); - writer = vector.getWriter(); - } - - /** - * Creates a consumer for {@link MapVector}. - */ - public static MapConsumer createConsumer(MapVector mapVector, int index, boolean nullable) { - return new MapConsumer(mapVector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - Object map = resultSet.getObject(columnIndexInResultSet); - writer.setPosition(currentRow++); - if (map != null) { - if (map instanceof String) { - writeJavaMapIntoVector(objectMapper.readValue((String) map, typeReference)); - } else if (map instanceof Map) { - writeJavaMapIntoVector((Map) map); - } else { - throw new IllegalArgumentException("Unknown type of map type column from JDBC " + map.getClass().getName()); - } - } else { - writer.writeNull(); - } - } - - private void writeJavaMapIntoVector(Map map) { - BufferAllocator allocator = vector.getAllocator(); - writer.startMap(); - map.forEach((key, value) -> { - byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; - try ( - ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = valueBytes != null ? allocator.buffer(valueBytes.length) : null; - ) { - writer.startEntry(); - keyBuf.writeBytes(keyBytes); - writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - if (valueBytes != null) { - valueBuf.writeBytes(valueBytes); - writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - } else { - writer.value().varChar().writeNull(); - } - writer.endEntry(); - } - }); - writer.endMap(); - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/NullConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/NullConsumer.java deleted file mode 100644 index 4fd3ed7..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/NullConsumer.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.NullVector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume null type values from ResultSet. - * Corresponding to {@link NullVector}. - */ -public class NullConsumer extends BaseConsumer { - - public NullConsumer(NullVector vector) { - super(vector, 0); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/SmallIntConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/SmallIntConsumer.java deleted file mode 100644 index ee4baef..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/SmallIntConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.SmallIntVector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume smallInt type values from {@link ResultSet}. - * Write the data to {@link SmallIntVector}. - */ -public class SmallIntConsumer { - - /** - * Creates a consumer for {@link SmallIntVector}. - */ - public static BaseConsumer createConsumer(SmallIntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableSmallIntConsumer(vector, index); - } else { - return new NonNullableSmallIntConsumer(vector, index); - } - } - - /** - * Nullable consumer for small int. - */ - static class NullableSmallIntConsumer extends BaseConsumer { - - /** - * Instantiate a SmallIntConsumer. - */ - public NullableSmallIntConsumer(SmallIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - short value = resultSet.getShort(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for small int. - */ - static class NonNullableSmallIntConsumer extends BaseConsumer { - - /** - * Instantiate a SmallIntConsumer. - */ - public NonNullableSmallIntConsumer(SmallIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - short value = resultSet.getShort(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimeConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimeConsumer.java deleted file mode 100644 index bfe5ede..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimeConsumer.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.TimeMilliVector; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Time; -import java.util.Calendar; - -/** - * Consumer which consume time type values from {@link ResultSet}. - * Write the data to {@link TimeMilliVector}. - */ -public abstract class TimeConsumer { - - /** - * Creates a consumer for {@link TimeMilliVector}. - */ - public static JdbcConsumer createConsumer( - TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { - if (nullable) { - return new NullableTimeConsumer(vector, index, calendar); - } else { - return new NonNullableTimeConsumer(vector, index, calendar); - } - } - - /** - * Nullable consumer for {@link TimeMilliVector}. - */ - static class NullableTimeConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** - * Instantiate a TimeConsumer. - */ - public NullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); - } - - /** - * Instantiate a TimeConsumer. - */ - public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, (int) time.getTime()); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for {@link TimeMilliVector}. - */ - static class NonNullableTimeConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** - * Instantiate a TimeConsumer. - */ - public NonNullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); - } - - /** - * Instantiate a TimeConsumer. - */ - public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, (int) time.getTime()); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampConsumer.java deleted file mode 100644 index 228b9f2..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampConsumer.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.TimeStampMilliVector; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; - -/** - * Consumer which consume timestamp type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliVector}. - */ -public abstract class TimestampConsumer { - - /** - * Creates a consumer for {@link TimeStampMilliVector}. - */ - public static JdbcConsumer createConsumer( - TimeStampMilliVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableTimestampConsumer(vector, index); - } else { - return new NonNullableTimestampConsumer(vector, index); - } - } - - /** - * Nullable consumer for timestamp. - */ - static class NullableTimestampConsumer extends BaseConsumer { - - /** - * Instantiate a TimestampConsumer. - */ - public NullableTimestampConsumer(TimeStampMilliVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for timestamp. - */ - static class NonNullableTimestampConsumer extends BaseConsumer { - - /** - * Instantiate a TimestampConsumer. - */ - public NonNullableTimestampConsumer(TimeStampMilliVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampTZConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampTZConsumer.java deleted file mode 100644 index 3d405a5..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TimestampTZConsumer.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.TimeStampMilliTZVector; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.util.Calendar; - -/** - * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliTZVector}. - */ -public class TimestampTZConsumer { - /** - * Creates a consumer for {@link TimeStampMilliTZVector}. - */ - public static JdbcConsumer createConsumer( - TimeStampMilliTZVector vector, int index, boolean nullable, Calendar calendar) { - Preconditions.checkArgument(calendar != null, "Calendar cannot be null"); - if (nullable) { - return new NullableTimestampTZConsumer(vector, index, calendar); - } else { - return new NonNullableTimestampConsumer(vector, index, calendar); - } - } - - /** - * Nullable consumer for timestamp (with time zone). - */ - static class NullableTimestampTZConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** - * Instantiate a TimestampConsumer. - */ - public NullableTimestampTZConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for timestamp (with time zone). - */ - static class NonNullableTimestampConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** - * Instantiate a TimestampConsumer. - */ - public NonNullableTimestampConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TinyIntConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TinyIntConsumer.java deleted file mode 100644 index 3a67697..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/TinyIntConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.TinyIntVector; - -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume tinyInt type values from {@link ResultSet}. - * Write the data to {@link TinyIntVector}. - */ -public abstract class TinyIntConsumer { - - /** - * Creates a consumer for {@link TinyIntVector}. - */ - public static JdbcConsumer createConsumer(TinyIntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableTinyIntConsumer(vector, index); - } else { - return new NonNullableTinyIntConsumer(vector, index); - } - } - - /** - * Nullable consumer for tiny int. - */ - static class NullableTinyIntConsumer extends BaseConsumer { - - /** - * Instantiate a TinyIntConsumer. - */ - public NullableTinyIntConsumer(TinyIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - byte value = resultSet.getByte(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for tiny int. - */ - static class NonNullableTinyIntConsumer extends BaseConsumer { - - /** - * Instantiate a TinyIntConsumer. - */ - public NonNullableTinyIntConsumer(TinyIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - byte value = resultSet.getByte(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/VarCharConsumer.java b/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/VarCharConsumer.java deleted file mode 100644 index 0801d36..0000000 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/rdbms/adaptor/consumer/VarCharConsumer.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.manager.connector.rdbms.adaptor.consumer; - -import org.apache.arrow.vector.VarCharVector; - -import java.nio.charset.StandardCharsets; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Consumer which consume varchar type values from {@link ResultSet}. - * Write the data to {@link VarCharVector}. - */ -public abstract class VarCharConsumer { - - /** - * Creates a consumer for {@link VarCharVector}. - */ - public static JdbcConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableVarCharConsumer(vector, index); - } else { - return new NonNullableVarCharConsumer(vector, index); - } - } - - /** - * Nullable consumer for var char. - */ - static class NullableVarCharConsumer extends BaseConsumer { - - /** - * Instantiate a VarCharConsumer. - */ - public NullableVarCharConsumer(VarCharVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - String value = resultSet.getString(columnIndexInResultSet); - if (!resultSet.wasNull()) { - byte[] bytes = value.getBytes(StandardCharsets.UTF_8); - vector.setSafe(currentIndex, bytes); - } - currentIndex++; - } - } - - /** - * Non-nullable consumer for var char. - */ - static class NonNullableVarCharConsumer extends BaseConsumer { - - /** - * Instantiate a VarCharConsumer. - */ - public NonNullableVarCharConsumer(VarCharVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - String value = resultSet.getString(columnIndexInResultSet); - byte[] bytes = value.getBytes(StandardCharsets.UTF_8); - vector.setSafe(currentIndex, bytes); - currentIndex++; - } - } -} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/pom.xml b/dataproxy-plugins/dataproxy-plugin-odps/pom.xml new file mode 100644 index 0000000..229e344 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/pom.xml @@ -0,0 +1,28 @@ + + 4.0.0 + + org.secretflow + dataproxy-plugins + 0.0.1-SNAPSHOT + ../pom.xml + + + dataproxy-plugin-odps + jar + + dataproxy-plugin-odps + http://maven.apache.org + + + + com.aliyun.odps + odps-sdk-core + + + org.projectlombok + lombok + compile + + + diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/DefaultOdpsFlightConfigLoader.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/DefaultOdpsFlightConfigLoader.java new file mode 100644 index 0000000..b2dc45e --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/DefaultOdpsFlightConfigLoader.java @@ -0,0 +1,46 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import org.secretflow.dataproxy.core.config.ConfigLoader; + +import java.util.Properties; + +/** + * @author yuexie + * @date 2024/11/28 15:16 + **/ +public class DefaultOdpsFlightConfigLoader implements ConfigLoader { + + /** + * load properties
+ * Read the information and load it into the properties passed in
+ * The reading order is sorted by {@link #getPriority()}
+ * + * @param properties properties + */ + @Override + public void loadProperties(Properties properties) { + properties.put(OdpsConfigConstant.ConfigKey.MAX_FLIGHT_ENDPOINT, 1); + properties.put(OdpsConfigConstant.ConfigKey.FLIGHT_ENDPOINT_UPGRADE_TO_MULTI_BATCH_THRESHOLD, 1000_000L); + } + + @Override + public int getPriority() { + return 1; + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/EnvironmentOdpsFlightConfigLoader.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/EnvironmentOdpsFlightConfigLoader.java new file mode 100644 index 0000000..85a196b --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/EnvironmentOdpsFlightConfigLoader.java @@ -0,0 +1,68 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import lombok.extern.slf4j.Slf4j; +import org.secretflow.dataproxy.common.utils.EnvVarUtils; +import org.secretflow.dataproxy.core.config.ConfigLoader; + +import java.util.Optional; +import java.util.Properties; + +/** + * @author yuexie + * @date 2024/12/6 15:45 + **/ +@Slf4j +public class EnvironmentOdpsFlightConfigLoader implements ConfigLoader { + + /** + * load properties
+ * Read the information and load it into the properties passed in
+ * The reading order is sorted by {@link #getPriority()}
+ * + * @param properties properties + */ + @Override + public void loadProperties(Properties properties) { + + try { + log.info("Load odps flight config from system env."); + Optional maxEndpoint = EnvVarUtils.getInt(OdpsConfigConstant.ConfigKey.MAX_FLIGHT_ENDPOINT); + if (maxEndpoint.isPresent()) { + log.debug("Load odps flight config `MAX_FLIGHT_ENDPOINT` from system env, limits range 1 to 5. key: {}, value: {}", OdpsConfigConstant.ConfigKey.MAX_FLIGHT_ENDPOINT, maxEndpoint.get()); + properties.put(OdpsConfigConstant.ConfigKey.MAX_FLIGHT_ENDPOINT, EnvVarUtils.getEffectiveValue(maxEndpoint.get(), 1, 5)); + } + + Optional batchThreshold = EnvVarUtils.getLong(OdpsConfigConstant.ConfigKey.FLIGHT_ENDPOINT_UPGRADE_TO_MULTI_BATCH_THRESHOLD); + if (batchThreshold.isPresent()) { + log.debug("Load odps flight config `MAX_FLIGHT_ENDPOINT` from system env, limits range 300,000 to 100,000,000. key: {}, value: {}", OdpsConfigConstant.ConfigKey.FLIGHT_ENDPOINT_UPGRADE_TO_MULTI_BATCH_THRESHOLD, batchThreshold.get()); + properties.put(OdpsConfigConstant.ConfigKey.FLIGHT_ENDPOINT_UPGRADE_TO_MULTI_BATCH_THRESHOLD, EnvVarUtils.getEffectiveValue(batchThreshold.get(), 300_000L, 100_000_000L)); + } + } catch (Exception e) { + log.error("Failed to load odps flight config from system env. This error will be ignored and some configurations will not take effect. error: {}", e.getMessage(), e); + } + + } + + @Override + public int getPriority() { + return 3; + } + + +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsCommandConfig.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsCommandConfig.java new file mode 100644 index 0000000..e467ba0 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsCommandConfig.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import lombok.Getter; +import org.apache.arrow.vector.types.pojo.Schema; +import org.secretflow.dataproxy.plugin.odps.constant.OdpsTypeEnum; + +/** + * @author yuexie + * @date 2024/11/6 16:51 + **/ +@Getter +public abstract class OdpsCommandConfig { + + protected final OdpsConnectConfig odpsConnectConfig; + protected final OdpsTypeEnum odpsTypeEnum; + protected final T commandConfig; + + public OdpsCommandConfig(OdpsConnectConfig odpsConnectConfig, OdpsTypeEnum odpsTypeEnum, T commandConfig) { + this.odpsConnectConfig = odpsConnectConfig; + this.odpsTypeEnum = odpsTypeEnum; + this.commandConfig = commandConfig; + } + + public abstract String taskRunSQL(); + + public abstract Schema getResultSchema(); +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConfigConstant.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConfigConstant.java new file mode 100644 index 0000000..c796428 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConfigConstant.java @@ -0,0 +1,38 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +/** + * @author yuexie + * @date 2024/11/28 17:27 + **/ +public class OdpsConfigConstant { + + public static class ConfigKey { + + /** + * The maximum number of flight endpoints that can be split on the server + */ + public static final String MAX_FLIGHT_ENDPOINT = "FLIGHT_ENDPOINT_ODPS_MAX"; + + /** + * The threshold for upgrading to multi-batch mode + */ + public static final String FLIGHT_ENDPOINT_UPGRADE_TO_MULTI_BATCH_THRESHOLD = "FLIGHT_ENDPOINT_ODPS_UPGRADE_THRESHOLD"; + + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConnectConfig.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConnectConfig.java new file mode 100644 index 0000000..55fed8a --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsConnectConfig.java @@ -0,0 +1,34 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import org.secretflow.dataproxy.common.serializer.SensitiveDataSerializer; + +/** + * @param accessKeyId access key id + * @param accessKeySecret access key secret + * @param endpoint endpoint + * @param projectName project name + * @author yuexie + * @date 2024/10/30 17:38 + */ +public record OdpsConnectConfig(@JsonSerialize(using = SensitiveDataSerializer.class) String accessKeyId, + @JsonSerialize(using = SensitiveDataSerializer.class) String accessKeySecret, + String endpoint, String projectName) { + +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableIndex.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableConfig.java similarity index 50% rename from dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableIndex.java rename to dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableConfig.java index 81d0529..1959e9d 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/dataset/format/TableIndex.java +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableConfig.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,39 +14,22 @@ * limitations under the License. */ -package org.secretflow.dataproxy.common.model.dataset.format; +package org.secretflow.dataproxy.plugin.odps.config; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; +import com.fasterxml.jackson.annotation.JsonIgnore; +import org.secretflow.v1alpha1.common.Common; import java.util.List; /** - * Table index + * transform from Kuscia DomainData * - * @author yumu - * @date 2023/9/4 10:17 - */ -@Data -@NoArgsConstructor -@AllArgsConstructor -@Builder -public class TableIndex { - - /** - * Index name - */ - private String indexName; - - /** - * Index type - */ - private IndexType type; - - /** - * Index field name list - */ - private List field; + * @param tableName table name + * @param partition partition + * @param columns columns, For the time being, use the DataColumn in pb, and then strip Kuscia from the core part of the design + * + * @author yuexie + * @date 2024/11/7 16:43 + **/ +public record OdpsTableConfig(String tableName, String partition, @JsonIgnore List columns) { } diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableQueryConfig.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableQueryConfig.java new file mode 100644 index 0000000..10c83b9 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsTableQueryConfig.java @@ -0,0 +1,55 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import lombok.Getter; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.secretflow.dataproxy.common.utils.ArrowUtil; +import org.secretflow.dataproxy.plugin.odps.constant.OdpsTypeEnum; + +import java.util.stream.Collectors; + +/** + * @author yuexie + * @date 2024/11/7 16:44 + **/ +@Getter +public class OdpsTableQueryConfig extends OdpsCommandConfig { + + public OdpsTableQueryConfig(OdpsConnectConfig odpsConnectConfig, OdpsTableConfig readConfig) { + super(odpsConnectConfig, OdpsTypeEnum.TABLE, readConfig); + } + + public OdpsTableQueryConfig(OdpsConnectConfig odpsConnectConfig, OdpsTypeEnum odpsTypeEnum, OdpsTableConfig readConfig) { + super(odpsConnectConfig, odpsTypeEnum, readConfig); + } + + @Override + public String taskRunSQL() { + return ""; + } + + @Override + public Schema getResultSchema() { + + return new Schema(commandConfig.columns().stream() + .map(column -> + Field.nullable(column.getName(), ArrowUtil.parseKusciaColumnType(column.getType()))) + .collect(Collectors.toList())); + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsWriteConfig.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsWriteConfig.java new file mode 100644 index 0000000..745016a --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/OdpsWriteConfig.java @@ -0,0 +1,52 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.secretflow.dataproxy.common.utils.ArrowUtil; +import org.secretflow.dataproxy.plugin.odps.constant.OdpsTypeEnum; + +import java.util.stream.Collectors; + +/** + * @author yuexie + * @date 2024/11/10 21:59 + **/ +public class OdpsWriteConfig extends OdpsCommandConfig { + + public OdpsWriteConfig(OdpsConnectConfig odpsConnectConfig, OdpsTableConfig readConfig) { + super(odpsConnectConfig, OdpsTypeEnum.TABLE, readConfig); + } + + public OdpsWriteConfig(OdpsConnectConfig odpsConnectConfig, OdpsTypeEnum typeEnum, OdpsTableConfig readConfig) { + super(odpsConnectConfig, typeEnum, readConfig); + } + + @Override + public String taskRunSQL() { + return ""; + } + + @Override + public Schema getResultSchema() { + return new Schema(commandConfig.columns().stream() + .map(column -> + Field.nullable(column.getName(), ArrowUtil.parseKusciaColumnType(column.getType()))) + .collect(Collectors.toList())); + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/ScqlCommandJobConfig.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/ScqlCommandJobConfig.java new file mode 100644 index 0000000..4f2ef33 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/ScqlCommandJobConfig.java @@ -0,0 +1,42 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import org.apache.arrow.vector.types.pojo.Schema; +import org.secretflow.dataproxy.plugin.odps.constant.OdpsTypeEnum; + +/** + * @author yuexie + * @date 2024/10/30 17:47 + **/ +public class ScqlCommandJobConfig extends OdpsCommandConfig { + + public ScqlCommandJobConfig(OdpsConnectConfig odpsConnectConfig, String querySql) { + super(odpsConnectConfig, OdpsTypeEnum.SQL, querySql); + } + + @Override + public String taskRunSQL() { + return commandConfig; + } + + @Override + public Schema getResultSchema() { + // Initialize the query and use the table schema returned by the ODPS query + return null; + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/TaskConfig.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/TaskConfig.java new file mode 100644 index 0000000..b25817a --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/config/TaskConfig.java @@ -0,0 +1,66 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.config; + +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; +import org.secretflow.dataproxy.plugin.odps.reader.OdpsDoGetContext; + +/** + * @author yuexie + * @date 2024/10/30 17:46 + **/ +@Getter +@ToString +public class TaskConfig { + + private final OdpsDoGetContext context; + + /** + * Start index + */ + private final long startIndex; + + /** + * Count + */ + private final long count; + + /** + * Current index: Reserved field, endpoint resume + */ + @Setter + private long currentIndex; + + /** + * Whether to compress + */ + private final boolean compress; + + public TaskConfig(OdpsDoGetContext context, long startIndex, long count) { + this(context, startIndex, count, true); + } + + public TaskConfig(OdpsDoGetContext context, long startIndex, long count, boolean compress) { + this.context = context; + this.startIndex = startIndex; + this.count = count; + this.compress = compress; + this.currentIndex = startIndex; + } +} diff --git a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MysqlConnConfig.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/constant/OdpsTypeEnum.java similarity index 61% rename from dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MysqlConnConfig.java rename to dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/constant/OdpsTypeEnum.java index 6b83719..5657a0a 100644 --- a/dataproxy-common/src/main/java/org/secretflow/dataproxy/common/model/datasource/conn/MysqlConnConfig.java +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/constant/OdpsTypeEnum.java @@ -1,5 +1,5 @@ /* - * Copyright 2023 Ant Group Co., Ltd. + * Copyright 2024 Ant Group Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,23 +14,27 @@ * limitations under the License. */ -package org.secretflow.dataproxy.common.model.datasource.conn; +package org.secretflow.dataproxy.plugin.odps.constant; -import lombok.AllArgsConstructor; import lombok.Getter; -import lombok.Setter; -import lombok.experimental.SuperBuilder; /** - * MySQL datasource connection config - * - * @author yumu - * @date 2023/8/30 16:36 - */ + * @author yuexie + * @date 2024/11/6 16:52 + **/ @Getter -@Setter -@SuperBuilder -@AllArgsConstructor -public class MysqlConnConfig extends JdbcBaseConnConfig { +public enum OdpsTypeEnum { + + TABLE("table"), + + SQL("sql"), + + FILE("resource"); + + private final String type; + + OdpsTypeEnum(String type) { + this.type = type; + } } diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/converter/OdpsParamConverter.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/converter/OdpsParamConverter.java new file mode 100644 index 0000000..dfc6ca2 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/converter/OdpsParamConverter.java @@ -0,0 +1,80 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.converter; + +import org.secretflow.dataproxy.core.converter.DataProxyParamConverter; +import org.secretflow.dataproxy.plugin.odps.config.OdpsConnectConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableQueryConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsWriteConfig; +import org.secretflow.dataproxy.plugin.odps.config.ScqlCommandJobConfig; +import org.secretflow.dataproxy.plugin.odps.constant.OdpsTypeEnum; +import org.secretflow.v1alpha1.kusciaapi.Domaindata; +import org.secretflow.v1alpha1.kusciaapi.Domaindatasource; +import org.secretflow.v1alpha1.kusciaapi.Flightdm; +import org.secretflow.v1alpha1.kusciaapi.Flightinner; + +/** + * @author yuexie + * @date 2024/10/31 19:40 + **/ +public class OdpsParamConverter implements DataProxyParamConverter { + + @Override + public ScqlCommandJobConfig convert(Flightinner.CommandDataMeshSqlQuery request) { + Domaindatasource.OdpsDataSourceInfo odps = request.getDatasource().getInfo().getOdps(); + return new ScqlCommandJobConfig(convert(odps), request.getQuery().getSql()); + } + + @Override + public OdpsTableQueryConfig convert(Flightinner.CommandDataMeshQuery request) { + + Domaindatasource.OdpsDataSourceInfo odps = request.getDatasource().getInfo().getOdps(); + + Domaindata.DomainData domaindata = request.getDomaindata(); + + String tableName = domaindata.getRelativeUri(); + String partitionSpec = request.getQuery().getPartitionSpec(); + OdpsTableConfig odpsTableConfig = new OdpsTableConfig(tableName, partitionSpec, domaindata.getColumnsList()); + + if (Flightdm.ContentType.RAW.equals(request.getQuery().getContentType())) { + return new OdpsTableQueryConfig(convert(odps), OdpsTypeEnum.FILE, odpsTableConfig); + } + + return new OdpsTableQueryConfig(convert(odps), odpsTableConfig); + } + + @Override + public OdpsWriteConfig convert(Flightinner.CommandDataMeshUpdate request) { + Domaindatasource.OdpsDataSourceInfo odps = request.getDatasource().getInfo().getOdps(); + Domaindata.DomainData domaindata = request.getDomaindata(); + + String tableName = domaindata.getRelativeUri(); + String partitionSpec = request.getUpdate().getPartitionSpec(); + OdpsTableConfig odpsTableConfig = new OdpsTableConfig(tableName, partitionSpec, domaindata.getColumnsList()); + + if (Flightdm.ContentType.RAW.equals(request.getUpdate().getContentType())) { + return new OdpsWriteConfig(convert(odps), OdpsTypeEnum.FILE, odpsTableConfig); + } + return new OdpsWriteConfig(convert(odps), odpsTableConfig); + } + + private static OdpsConnectConfig convert(Domaindatasource.OdpsDataSourceInfo odps) { + return new OdpsConnectConfig(odps.getAccessKeyId(), odps.getAccessKeySecret(), odps.getEndpoint(), odps.getProject()); + } + +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/io/DynamicSequenceInputStream.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/io/DynamicSequenceInputStream.java new file mode 100644 index 0000000..dee851b --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/io/DynamicSequenceInputStream.java @@ -0,0 +1,150 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.io; + +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.io.InputStream; +import java.util.LinkedList; +import java.util.Queue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + * @author yuexie + * @date 2024/12/16 11:08 + **/ +@Slf4j +public class DynamicSequenceInputStream extends InputStream { + + private final Queue streamQueue = new LinkedList<>(); + private volatile InputStream currentStream = null; + + private final AtomicBoolean isCompleted = new AtomicBoolean(false); + + private final Lock lock = new ReentrantLock(true); + + public void appendStream(InputStream stream) { + + try { + lock.lock(); + + if (isCompleted.get()) { + throw new IllegalStateException("Stream is completed"); + } + + if (currentStream == null) { + currentStream = stream; + } else { + if (!streamQueue.add(stream)) { + throw new IllegalStateException("Stream queue is full"); + } + } + } finally { + lock.unlock(); + } + } + + public void setCompleted() { + try { + lock.lock(); + isCompleted.set(true); + } finally { + lock.unlock(); + } + } + + + + /** + * Reads the next byte of data from the input stream. The value byte is + * returned as an {@code int} in the range {@code 0} to + * {@code 255}. If no byte is available because the end of the stream + * has been reached, the value {@code -1} is returned. This method + * blocks until input data is available, the end of the stream is detected, + * or an exception is thrown. + * + *

A subclass must provide an implementation of this method. + * + * @return the next byte of data, or {@code -1} if the end of the + * stream is reached. + * @throws IOException if an I/O error occurs. + */ + @Override + public int read() throws IOException { + + if (currentStream == null) { + return this.readNextStream(); + } + + int data = currentStream.read(); + + if (data == -1) { + currentStream.close(); + currentStream = null; + return this.readNextStream(); + } + return data; + } + + @Override + public void close() throws IOException { + super.close(); + setCompleted(); + if (currentStream != null) { + currentStream.close(); + } + for (InputStream stream : streamQueue) { + stream.close(); + } + } + + private int readNextStream() throws IOException { + + // Promised: currentStream is null + try { + lock.lock(); + + currentStream = streamQueue.poll(); + + if (currentStream == null) { + + if (isCompleted.get()) { + return -1; + } + + lock.unlock(); + TimeUnit.MILLISECONDS.sleep(100); + return readNextStream(); + } + + return currentStream.read(); + + } catch (InterruptedException e) { + log.warn("Thread interrupted while waiting for next stream to read.", e); + Thread.currentThread().interrupt(); + } finally { + lock.unlock(); + } + + return -1; + } + +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/producer/OdpsFlightProducer.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/producer/OdpsFlightProducer.java new file mode 100644 index 0000000..38b2d3f --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/producer/OdpsFlightProducer.java @@ -0,0 +1,247 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.producer; + +import com.google.protobuf.Any; +import com.google.protobuf.InvalidProtocolBufferException; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.flight.CallStatus; +import org.apache.arrow.flight.FlightDescriptor; +import org.apache.arrow.flight.FlightEndpoint; +import org.apache.arrow.flight.FlightInfo; +import org.apache.arrow.flight.FlightStream; +import org.apache.arrow.flight.NoOpFlightProducer; +import org.apache.arrow.flight.PutResult; +import org.apache.arrow.flight.Ticket; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.message.IpcOption; +import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; +import org.secretflow.dataproxy.common.exceptions.DataproxyException; +import org.secretflow.dataproxy.common.utils.GrpcUtils; +import org.secretflow.dataproxy.common.utils.JsonUtils; +import org.secretflow.dataproxy.core.config.FlightServerContext; +import org.secretflow.dataproxy.core.param.ParamWrapper; +import org.secretflow.dataproxy.core.service.TicketService; +import org.secretflow.dataproxy.core.service.impl.CacheTicketService; +import org.secretflow.dataproxy.core.spi.producer.DataProxyFlightProducer; +import org.secretflow.dataproxy.core.writer.Writer; +import org.secretflow.dataproxy.plugin.odps.config.OdpsCommandConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsWriteConfig; +import org.secretflow.dataproxy.plugin.odps.config.TaskConfig; +import org.secretflow.dataproxy.plugin.odps.constant.OdpsTypeEnum; +import org.secretflow.dataproxy.plugin.odps.converter.OdpsParamConverter; +import org.secretflow.dataproxy.plugin.odps.reader.OdpsDoGetContext; +import org.secretflow.dataproxy.plugin.odps.reader.OdpsReader; +import org.secretflow.dataproxy.plugin.odps.reader.OdpsResourceReader; +import org.secretflow.dataproxy.plugin.odps.writer.OdpsRecordWriter; +import org.secretflow.dataproxy.plugin.odps.writer.OdpsResourceWriter; +import org.secretflow.v1alpha1.kusciaapi.Flightdm; +import org.secretflow.v1alpha1.kusciaapi.Flightinner; + +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; + +/** + * @author yuexie + * @date 2024/11/7 16:15 + **/ +@Slf4j +public class OdpsFlightProducer extends NoOpFlightProducer implements DataProxyFlightProducer { + + private final TicketService ticketService = CacheTicketService.getInstance(); + /** + * Obtain the data type used for registration name and identification processing. + * + * @return producer name + */ + @Override + public String getProducerName() { + return "odps"; + } + + @Override + public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { + + final Any any = GrpcUtils.parseOrThrow(descriptor.getCommand()); + + try { + boolean isPut = false; + OdpsCommandConfig commandConfig = switch (any.getTypeUrl()) { + case "type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.CommandDataMeshSqlQuery" -> + new OdpsParamConverter().convert(any.unpack(Flightinner.CommandDataMeshSqlQuery.class)); + case "type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.CommandDataMeshQuery" -> + new OdpsParamConverter().convert(any.unpack(Flightinner.CommandDataMeshQuery.class)); + case "type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.CommandDataMeshUpdate" -> { + isPut = true; + yield new OdpsParamConverter().convert(any.unpack(Flightinner.CommandDataMeshUpdate.class)); + } + default -> throw CallStatus.INVALID_ARGUMENT + .withDescription("Unknown command type") + .toRuntimeException(); + }; + + log.info("OdpsFlightProducer#getFlightInfo, commandConfig: {}", JsonUtils.toString(commandConfig)); + + byte[] bytes; + List endpointList; + if (isPut) { + bytes = ticketService.generateTicket(ParamWrapper.of(getProducerName(), commandConfig)); + Flightdm.TicketDomainDataQuery ticketDomainDataQuery = Flightdm.TicketDomainDataQuery.newBuilder().setDomaindataHandle(new String(bytes)).build(); + bytes = Any.pack(ticketDomainDataQuery).toByteArray(); + endpointList = Collections.singletonList( + new FlightEndpoint(new Ticket(bytes), FlightServerContext.getInstance().getFlightServerConfig().getLocation()) + ); + } else if (OdpsTypeEnum.FILE.equals(commandConfig.getOdpsTypeEnum())) { + bytes = ticketService.generateTicket(ParamWrapper.of(getProducerName(), commandConfig)); + endpointList = Collections.singletonList( + new FlightEndpoint(new Ticket(bytes), FlightServerContext.getInstance().getFlightServerConfig().getLocation()) + ); + } else { + OdpsDoGetContext odpsDoGetContext = new OdpsDoGetContext(commandConfig); + endpointList = odpsDoGetContext.getFlightEndpoints(getProducerName()); + } + + // Only the protocol is used, and the concrete schema is not returned here. + return new FlightInfo(DataProxyFlightProducer.DEFACT_SCHEMA, descriptor, endpointList, 0, 0,true, IpcOption.DEFAULT); + } catch (InvalidProtocolBufferException e) { + throw CallStatus.INVALID_ARGUMENT + .withCause(e) + .withDescription(e.getMessage()) + .toRuntimeException(); + } catch (Exception e) { + log.error("getFlightInfo error", e); + throw CallStatus.INTERNAL.withDescription(e.getMessage()).toRuntimeException(); + } + } + + @Override + public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { + + ParamWrapper paramWrapper = ticketService.getParamWrapper(ticket.getBytes()); + + ArrowReader odpsReader = null; + try { + if (paramWrapper.param() instanceof OdpsCommandConfig odpsCommandConfig) { + if (OdpsTypeEnum.FILE.equals(odpsCommandConfig.getOdpsTypeEnum())) { + Object commandConfig = odpsCommandConfig.getCommandConfig(); + if (commandConfig instanceof OdpsTableConfig odpsTableConfig) { + odpsReader = new OdpsResourceReader(new RootAllocator(), odpsCommandConfig.getOdpsConnectConfig(), odpsTableConfig); + } else { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "The odps read parameter is invalid, type url: " + commandConfig.getClass()); + } + } else { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "The odps read parameter is invalid, type url: " + paramWrapper.param().getClass()); + } + } else if (paramWrapper.param() instanceof TaskConfig taskConfig) { + odpsReader = new OdpsReader(new RootAllocator(), taskConfig); + } else { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "The odps read parameter is invalid, type url: " + paramWrapper.param().getClass()); + } + + listener.start(odpsReader.getVectorSchemaRoot()); + while (true) { + if (context.isCancelled()) { + log.warn("reader is cancelled"); + break; + } + + if (odpsReader.loadNextBatch()) { + listener.putNext(); + } else { + break; + } + } + log.info("doGet is completed"); + listener.completed(); + } catch (Exception e) { + log.error("doGet error", e); + throw CallStatus.UNKNOWN + .withCause(e) + .withDescription(e.getMessage()) + .toRuntimeException(); + } finally { + try { + if (odpsReader != null) { + odpsReader.close(); + } + } catch (Exception e) { + log.error("close odps reader error", e); + } + } + } + + @Override + public Runnable acceptPut( + CallContext context, FlightStream flightStream, StreamListener ackStream) { + + final Any any = GrpcUtils.parseOrThrow(flightStream.getDescriptor().getCommand()); + + if (!"type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.TicketDomainDataQuery".equals(any.getTypeUrl())) { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "The odps write parameter is invalid, type url: " + any.getTypeUrl()); + } + + return () -> { + try { + Flightdm.TicketDomainDataQuery unpack = any.unpack(Flightdm.TicketDomainDataQuery.class); + OdpsWriteConfig writeConfig = ticketService.getParamWrapper(unpack.getDomaindataHandle().getBytes()).unwrap(OdpsWriteConfig.class); + + Writer writer; + if (OdpsTypeEnum.FILE.equals(writeConfig.getOdpsTypeEnum())) { + writer = new OdpsResourceWriter(writeConfig.getOdpsConnectConfig(), writeConfig.getCommandConfig()); + } else { + writer = new OdpsRecordWriter(writeConfig); + } + + VectorSchemaRoot vectorSchemaRoot; + String askMsg; + int count = 0; + while (flightStream.next()) { + vectorSchemaRoot = flightStream.getRoot(); + int rowCount = vectorSchemaRoot.getRowCount(); + askMsg = "row count: " + rowCount; + writer.write(vectorSchemaRoot); + + try(BufferAllocator ba = new RootAllocator(1024); + final ArrowBuf buffer = ba.buffer(askMsg.getBytes(StandardCharsets.UTF_8).length)) { + ackStream.onNext(PutResult.metadata(buffer)); + } + count += rowCount; + } + writer.flush(); + ackStream.onCompleted(); + log.info("put data over! all count: {}", count); + } catch (InvalidProtocolBufferException e) { + throw CallStatus.INVALID_ARGUMENT + .withCause(e) + .withDescription(e.getMessage()) + .toRuntimeException(); + } catch (Exception e) { + log.error("unknown error", e); + throw CallStatus.INTERNAL + .withCause(e) + .withDescription(e.getMessage()) + .toRuntimeException(); + } + }; + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetContext.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetContext.java new file mode 100644 index 0000000..2150b6a --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetContext.java @@ -0,0 +1,211 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.reader; + +import com.aliyun.odps.Instance; +import com.aliyun.odps.Odps; +import com.aliyun.odps.OdpsException; +import com.aliyun.odps.PartitionSpec; +import com.aliyun.odps.task.SQLTask; +import com.aliyun.odps.tunnel.InstanceTunnel; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.flight.FlightEndpoint; +import org.apache.arrow.flight.Ticket; +import org.apache.arrow.vector.types.pojo.Schema; +import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; +import org.secretflow.dataproxy.common.exceptions.DataproxyException; +import org.secretflow.dataproxy.core.config.FlightServerContext; +import org.secretflow.dataproxy.core.param.ParamWrapper; +import org.secretflow.dataproxy.core.service.TicketService; +import org.secretflow.dataproxy.core.service.impl.CacheTicketService; +import org.secretflow.dataproxy.plugin.odps.config.OdpsCommandConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsConfigConstant; +import org.secretflow.dataproxy.plugin.odps.config.OdpsConnectConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableQueryConfig; +import org.secretflow.dataproxy.plugin.odps.config.ScqlCommandJobConfig; +import org.secretflow.dataproxy.plugin.odps.config.TaskConfig; +import org.secretflow.dataproxy.plugin.odps.constant.OdpsTypeEnum; +import org.secretflow.dataproxy.plugin.odps.utils.OdpsUtil; +import org.secretflow.v1alpha1.common.Common; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; + +/** + * @author yuexie + * @date 2024/11/26 13:48 + **/ +@Slf4j +public class OdpsDoGetContext { + + private final OdpsCommandConfig odpsCommandConfig; + + @Getter + private InstanceTunnel.DownloadSession downloadSession; + + @Getter + private long count; + + @Getter + private Schema schema; + + + public OdpsDoGetContext(OdpsCommandConfig config) { + this.odpsCommandConfig = config; + prepare(); + } + + private void prepare() { + + // 2. init download session + try { + OdpsConnectConfig odpsConnectConfig = odpsCommandConfig.getOdpsConnectConfig(); + Odps odps = OdpsUtil.initOdps(odpsConnectConfig); + String querySql; + + if (odpsCommandConfig instanceof ScqlCommandJobConfig scqlReadJobConfig) { + querySql = scqlReadJobConfig.getCommandConfig(); + } else if (odpsCommandConfig instanceof OdpsTableQueryConfig odpsTableQueryConfig) { + OdpsTableConfig tableConfig = odpsTableQueryConfig.getCommandConfig(); + // If the DomainData is parsed to SQL, + // the query is performed by the field of the DomainData and the field value of the column of the DomainData is returned + querySql = this.buildSql(odps, tableConfig.tableName(), tableConfig.columns().stream().map(Common.DataColumn::getName).toList(), tableConfig.partition()); + this.schema = odpsCommandConfig.getResultSchema(); + + } else { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Unsupported read parameter type: " + odpsCommandConfig.getClass()); + } + + Instance runInstance = SQLTask.run(odps, odpsConnectConfig.projectName(), querySql, OdpsUtil.getSqlFlag(), null); + + runInstance.waitForSuccess(); + log.debug("SQL Task run success, sql: {}", querySql); + + if (runInstance.isSuccessful()) { + downloadSession = new InstanceTunnel(odps).createDownloadSession(odpsConnectConfig.projectName(), runInstance.getId(), false); + this.count = downloadSession.getRecordCount(); + } else { + log.error("SQL Task run result is not successful, sql: {}", querySql); + throw DataproxyException.of(DataproxyErrorCode.ODPS_ERROR, "SQL Task run result is not successful, sql: " + querySql); + } + + // 3. init schema + // If the SQL is directly queried, the decision returned by SQL in the returned arrow schema is initialized here + // If it is determined by table, the field defined by DomainData is returned and initialized in a different way + if (odpsCommandConfig.getOdpsTypeEnum() == OdpsTypeEnum.SQL) { + this.initArrowSchemaFromColumns(); + } + + } catch (OdpsException e) { + log.error("SQL Task run error", e); + throw DataproxyException.of(DataproxyErrorCode.ODPS_ERROR, "SQL Task run error", e); + } + } + + public List getTaskConfigs() { + long upgradeThreshold = FlightServerContext.getOrDefault( + OdpsConfigConstant.ConfigKey.FLIGHT_ENDPOINT_UPGRADE_TO_MULTI_BATCH_THRESHOLD, + Long.class, + 1_000_000L); + int numberOfParts = FlightServerContext.getOrDefault( + OdpsConfigConstant.ConfigKey.MAX_FLIGHT_ENDPOINT, + Integer.class, + 3); + if (this.count > upgradeThreshold) { + + // More than 3 million rows, and the task needs to be split + log.info("SQL result count is greater than {}, split into {} tasks", upgradeThreshold, numberOfParts); + + long itemsPerBatch = count / numberOfParts; + long remainder = count % numberOfParts; + + ArrayList taskConfigs = new ArrayList<>(); + for (int i = 0; i < numberOfParts; i++) { + taskConfigs.add( + new TaskConfig(this, + i * itemsPerBatch + Math.min(i, remainder), + itemsPerBatch + (i < remainder ? 1 : 0))); + } + + return taskConfigs; + } + + return Collections.singletonList(new TaskConfig(this, 0, count)); + } + + public List getFlightEndpoints(String type) { + final TicketService ticketService = CacheTicketService.getInstance(); + byte[] bytes; + List taskConfigs = getTaskConfigs(); + List endpointList = new ArrayList<>(taskConfigs.size()); + for (TaskConfig taskConfig : taskConfigs) { + log.info("taskConfig: {}", taskConfig); + bytes = ticketService.generateTicket(ParamWrapper.of(type, taskConfig)); + + endpointList.add(new FlightEndpoint(new Ticket(bytes), FlightServerContext.getInstance().getFlightServerConfig().getLocation())); + } + + return endpointList; + } + + private void initArrowSchemaFromColumns() { + schema = new Schema(downloadSession.getSchema().getAllColumns().stream().map(OdpsUtil::convertOdpsColumnToArrowField).toList()); + } + + private String buildSql(Odps odps, String tableName, List fields, String whereClause) { + + final Pattern columnOrValuePattern = Pattern.compile("^[\\u00b7A-Za-z0-9\\u4e00-\\u9fa5\\-_,.]*$"); + + if (!columnOrValuePattern.matcher(tableName).matches()) { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Invalid tableName:" + tableName); + } + + boolean partitioned = odps.tables().get(tableName).isPartitioned(); + // Common tables no longer concatenate conditional statements + if (!partitioned) { + whereClause = ""; + } + + if (!whereClause.isEmpty()) { + String[] groups = whereClause.split("[,/]"); + if (groups.length > 1) { + final PartitionSpec partitionSpec = new PartitionSpec(whereClause); + + for (String key : partitionSpec.keys()) { + if (!columnOrValuePattern.matcher(key).matches()) { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Invalid partition key:" + key); + } + if (!columnOrValuePattern.matcher(partitionSpec.get(key)).matches()) { + throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Invalid partition value:" + partitionSpec.get(key)); + } + } + + List list = partitionSpec.keys().stream().map(k -> k + "='" + partitionSpec.get(k) + "'").toList(); + whereClause = String.join(" and ", list); + } + } + + log.debug("whereClause: {}", whereClause); + + return "select " + String.join(",", fields) + " from " + tableName + (whereClause.isEmpty() ? "" : " where " + whereClause) + ";"; + } + +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetTaskContext.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetTaskContext.java new file mode 100644 index 0000000..383f394 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsDoGetTaskContext.java @@ -0,0 +1,101 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.reader; + +import com.aliyun.odps.data.Record; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.secretflow.dataproxy.core.reader.Reader; +import org.secretflow.dataproxy.core.reader.Sender; +import org.secretflow.dataproxy.plugin.odps.config.TaskConfig; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * @author yuexie + * @date 2024/12/5 15:20 + **/ +@Slf4j +public class OdpsDoGetTaskContext implements AutoCloseable { + + private final Reader reader; + private final Sender sender; + + private final VectorSchemaRoot root; + + private final ExecutorService executorService = Executors.newFixedThreadPool(1); + + private Future readFuture; + + private final AtomicBoolean hasNext = new AtomicBoolean(true); + + public OdpsDoGetTaskContext(TaskConfig taskConfig, VectorSchemaRoot root) { + this.root = root; + this.sender = getSender(); + this.reader = new OdpsTunnelRecordReader(taskConfig, this.sender, taskConfig.getContext().getDownloadSession()); + } + + public void start() { + readFuture = executorService.submit(() -> { + try { + reader.read(); + sender.putOver(); + hasNext.set(false); + log.info("read finished..."); + } catch (InterruptedException e) { + log.error("read interrupted", e); + Thread.currentThread().interrupt(); + } + }); + } + + public void cancel() { + if (readFuture != null && !readFuture.isDone()) { + log.info("cancel read task..."); + readFuture.cancel(true); + } + } + + public boolean hasNext() { + return hasNext.get(); + } + + public void putNextPatchData() { + if ((!hasNext() && readFuture.isDone()) || readFuture.isCancelled() ) { + return; + } + sender.send(); + } + + @Override + public void close() throws Exception { + this.cancel(); + executorService.shutdown(); + } + + private Sender getSender() { + int estimatedRecordCount = 1_000; + return new OdpsRecordSender( + estimatedRecordCount, + new LinkedBlockingQueue<>(estimatedRecordCount), + root); + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsReader.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsReader.java new file mode 100644 index 0000000..641583a --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsReader.java @@ -0,0 +1,105 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.reader; + +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.types.pojo.Schema; +import org.secretflow.dataproxy.plugin.odps.config.TaskConfig; + +import java.io.IOException; + +/** + * @author yuexie + * @date 2024/10/30 17:56 + **/ +@Setter +@Slf4j +public class OdpsReader extends ArrowReader { + + private OdpsDoGetTaskContext odpsDoGetTaskContext = null; + + private final TaskConfig taskConfig; + + public OdpsReader(BufferAllocator allocator, TaskConfig taskConfig) { + super(allocator); + + this.taskConfig = taskConfig; + } + + /** + * Load the next ArrowRecordBatch to the vector schema root if available. + * + * @return true if a batch was read, false on EOS + * @throws IOException on error + */ + @Override + public boolean loadNextBatch() throws IOException { + + if (odpsDoGetTaskContext == null) { + prepare(); + } + + if (odpsDoGetTaskContext.hasNext()) { + odpsDoGetTaskContext.putNextPatchData(); + return true; + } + return false; + } + + private void prepare() throws IOException { + odpsDoGetTaskContext = new OdpsDoGetTaskContext(taskConfig, this.getVectorSchemaRoot()); + odpsDoGetTaskContext.start(); + } + + /** + * Return the number of bytes read from the ReadChannel. + * + * @return number of bytes read + */ + @Override + public long bytesRead() { + return 0; + } + + /** + * Close the underlying read source. + * + */ + @Override + protected void closeReadSource() { + try { + odpsDoGetTaskContext.close(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Read the Schema from the source, will be invoked at the beginning the initialization. + * + * @return the read Schema + */ + @Override + protected Schema readSchema() { + return taskConfig.getContext().getSchema(); + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsRecordSender.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsRecordSender.java new file mode 100644 index 0000000..0c84edc --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsRecordSender.java @@ -0,0 +1,185 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.reader; + +import com.aliyun.odps.Column; +import com.aliyun.odps.data.ArrayRecord; +import com.aliyun.odps.data.Record; +import com.aliyun.odps.utils.StringUtils; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.secretflow.dataproxy.core.converter.BigIntVectorConverter; +import org.secretflow.dataproxy.core.converter.BitVectorConverter; +import org.secretflow.dataproxy.core.converter.DateDayVectorConverter; +import org.secretflow.dataproxy.core.converter.DateMilliVectorConverter; +import org.secretflow.dataproxy.core.converter.Float4VectorConverter; +import org.secretflow.dataproxy.core.converter.Float8VectorConverter; +import org.secretflow.dataproxy.core.converter.IntVectorConverter; +import org.secretflow.dataproxy.core.converter.SmallIntVectorConverter; +import org.secretflow.dataproxy.core.converter.TimeMilliVectorConvertor; +import org.secretflow.dataproxy.core.converter.TimeStampNanoVectorConverter; +import org.secretflow.dataproxy.core.converter.TinyIntVectorConverter; +import org.secretflow.dataproxy.core.converter.ValueConversionStrategy; +import org.secretflow.dataproxy.core.converter.VarCharVectorConverter; +import org.secretflow.dataproxy.core.reader.AbstractSender; +import org.secretflow.dataproxy.core.visitor.BooleanValueVisitor; +import org.secretflow.dataproxy.core.visitor.ByteArrayValueVisitor; +import org.secretflow.dataproxy.core.visitor.ByteValueVisitor; +import org.secretflow.dataproxy.core.visitor.DoubleValueVisitor; +import org.secretflow.dataproxy.core.visitor.FloatValueVisitor; +import org.secretflow.dataproxy.core.visitor.IntegerValueVisitor; +import org.secretflow.dataproxy.core.visitor.LongValueVisitor; +import org.secretflow.dataproxy.core.visitor.ShortValueVisitor; + +import javax.annotation.Nonnull; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.LinkedBlockingQueue; + +/** + * @author yuexie + * @date 2024/11/1 14:14 + **/ +@Slf4j +public class OdpsRecordSender extends AbstractSender { + + private final static Map ARROW_TYPE_ID_FIELD_CONSUMER_MAP = new HashMap<>(); + + private final Map fieldVectorMap = new HashMap<>(); + + private boolean isInit = false; + + static { + SmallIntVectorConverter smallIntVectorConverter = new SmallIntVectorConverter(new ShortValueVisitor(), null); + TinyIntVectorConverter tinyIntVectorConverter = new TinyIntVectorConverter(new ByteValueVisitor(), smallIntVectorConverter); + BigIntVectorConverter bigIntVectorConverter = new BigIntVectorConverter(new LongValueVisitor(), tinyIntVectorConverter); + IntVectorConverter intVectorConverter = new IntVectorConverter(new IntegerValueVisitor(), bigIntVectorConverter); + + Float4VectorConverter float4VectorConverter = new Float4VectorConverter(new FloatValueVisitor(), null); + Float8VectorConverter float8VectorConverter = new Float8VectorConverter(new DoubleValueVisitor(), float4VectorConverter); + + DateMilliVectorConverter dateMilliVectorConverter = new DateMilliVectorConverter(new LongValueVisitor(), null); + + ARROW_TYPE_ID_FIELD_CONSUMER_MAP.put(ArrowType.ArrowTypeID.Int, intVectorConverter); + ARROW_TYPE_ID_FIELD_CONSUMER_MAP.put(ArrowType.ArrowTypeID.Utf8, new VarCharVectorConverter(new ByteArrayValueVisitor())); + ARROW_TYPE_ID_FIELD_CONSUMER_MAP.put(ArrowType.ArrowTypeID.FloatingPoint, float8VectorConverter); + ARROW_TYPE_ID_FIELD_CONSUMER_MAP.put(ArrowType.ArrowTypeID.Bool, new BitVectorConverter(new BooleanValueVisitor())); + ARROW_TYPE_ID_FIELD_CONSUMER_MAP.put(ArrowType.ArrowTypeID.Date, new DateDayVectorConverter(new IntegerValueVisitor(), dateMilliVectorConverter)); + ARROW_TYPE_ID_FIELD_CONSUMER_MAP.put(ArrowType.ArrowTypeID.Time, new TimeMilliVectorConvertor(new IntegerValueVisitor(), null)); + ARROW_TYPE_ID_FIELD_CONSUMER_MAP.put(ArrowType.ArrowTypeID.Timestamp, new TimeStampNanoVectorConverter(new LongValueVisitor())); + } + + /** + * Constructor + * + * @param estimatedRecordCount estimated record count + * @param recordQueue record queue + * @param root vector schema root + */ + public OdpsRecordSender(int estimatedRecordCount, LinkedBlockingQueue recordQueue, VectorSchemaRoot root) { + super(estimatedRecordCount, recordQueue, root); + } + + @Override + protected void toArrowVector(Record record, @Nonnull VectorSchemaRoot root, int takeRecordCount) { + + log.debug("record: {}, takeRecordCount: {}", record, takeRecordCount); + + this.initRecordColumn2FieldMap(record); + + Optional filedVectorOpt; + FieldVector vector; + String columnName; + ArrowType.ArrowTypeID arrowTypeID; + Column[] recordColumns = record.getColumns(); + Object recordColumnValue; + + for (Column recordColumn : recordColumns) { + columnName = recordColumn.getName(); + + filedVectorOpt = Optional.ofNullable(this.fieldVectorMap.get(columnName)); + + if (filedVectorOpt.isPresent()) { + vector = filedVectorOpt.get(); + recordColumnValue = record.get(columnName); + + if (Objects.isNull(recordColumnValue)) { + vector.setNull(takeRecordCount); + continue; + } + + arrowTypeID = vector.getField().getType().getTypeID(); + ArrowType.ArrowTypeID finalArrowTypeID = arrowTypeID; + + ValueConversionStrategy fieldConsumer = + Optional.ofNullable(ARROW_TYPE_ID_FIELD_CONSUMER_MAP.get(arrowTypeID)) + .orElseThrow(() -> new RuntimeException("Unsupported arrow type id: " + finalArrowTypeID)); + + fieldConsumer.convertAndSet(vector, takeRecordCount, recordColumnValue); + + } else { + log.debug("columnName: {} not in needColumns", columnName); + } + } + + } + + @Override + protected boolean isOver(Record record) { + return record instanceof ArrayRecord && record.getColumns().length == 0; + } + + @Override + public void putOver() throws InterruptedException { + this.put(new ArrayRecord(new Column[0])); + log.debug("putOver"); + } + + private synchronized void initRecordColumn2FieldMap(Record record) { + + if (isInit) { + return; + } + + VectorSchemaRoot root = getRoot(); + + if (Objects.isNull(root)) { + return; + } + List fieldVectors = root.getFieldVectors(); + Column[] columns = record.getColumns(); + Optional first; + for (Column column : columns) { + String name = column.getName(); + first = fieldVectors.stream() + .filter(fieldVector -> StringUtils.equalsIgnoreCase(fieldVector.getName(), name)) + .findFirst(); + + if (first.isPresent()) { + fieldVectorMap.put(name, first.get()); + } else { + log.debug("columnName: {} not in fieldVectors", name); + } + } + isInit = true; + } +} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceSplitReader.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsResourceReader.java similarity index 58% rename from dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceSplitReader.java rename to dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsResourceReader.java index 56439c6..3aac567 100644 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsResourceSplitReader.java +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsResourceReader.java @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.secretflow.dataproxy.manager.connector.odps; + +package org.secretflow.dataproxy.plugin.odps.reader; import com.aliyun.odps.Odps; import com.aliyun.odps.OdpsException; -import lombok.extern.slf4j.Slf4j; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.BitVectorHelper; @@ -29,9 +29,9 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.dataproxy.manager.SplitReader; +import org.secretflow.dataproxy.plugin.odps.config.OdpsConnectConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableConfig; +import org.secretflow.dataproxy.plugin.odps.utils.OdpsUtil; import java.io.IOException; import java.io.InputStream; @@ -39,67 +39,53 @@ import java.util.Objects; /** - * odps Resource Split Reader - * * @author yuexie - * @date 2024-06-01 17:08:45 - */ -@Slf4j -public class OdpsResourceSplitReader extends ArrowReader implements SplitReader { + * @date 2024/11/11 16:00 + **/ +public class OdpsResourceReader extends ArrowReader { private static final String FIELD_NAME = "binary_data"; - private static final int BATCH_SIZE = 3 * 1024 * 1024; + private static final int BATCH_SIZE = 1024 * 1024; + private static final int BYTES_READ = 1024; + private static final int BYTES_THRESHOLD = BATCH_SIZE - BYTES_READ; - private final OdpsConnConfig odpsConnConfig; + private final OdpsConnectConfig connectConfig; - private final OdpsTableInfo tableInfo; + private final OdpsTableConfig tableInfo; private InputStream inputStream; - - - private int readIndex = 0; - - protected OdpsResourceSplitReader(BufferAllocator allocator, OdpsConnConfig odpsConnConfig, OdpsTableInfo tableInfo) { + public OdpsResourceReader(BufferAllocator allocator, OdpsConnectConfig connectConfig, OdpsTableConfig tableInfo) { super(allocator); - this.odpsConnConfig = odpsConnConfig; + this.connectConfig = connectConfig; this.tableInfo = tableInfo; - } - - @Override - public ArrowReader startRead() { - - Odps odps = OdpsUtil.buildOdps(odpsConnConfig); - try { - inputStream = odps.resources().getResourceAsStream(tableInfo.tableName()); - } catch (OdpsException e) { - throw new RuntimeException(e); - } - return this; + this.prepare(); } + /** + * Load the next ArrowRecordBatch to the vector schema root if available. + * + * @return true if a batch was read, false on EOS + * @throws IOException on error + */ @Override public boolean loadNextBatch() throws IOException { VectorSchemaRoot root = getVectorSchemaRoot(); - root.clear(); VarBinaryVector vector = (VarBinaryVector) root.getVector(FIELD_NAME); vector.allocateNew(1); - // 申请足够空间 - while (vector.getDataBuffer().capacity() < BATCH_SIZE) { - vector.reallocDataBuffer(); + if (vector.getDataBuffer().capacity() < BATCH_SIZE) { + vector.reallocDataBuffer(BATCH_SIZE); } ArrowBuf dataBuffer = vector.getDataBuffer(); - int l = readRangeToBuffer(dataBuffer, 0); + int l = readRangeToBuffer(dataBuffer); if (l == 0) { return false; } - readIndex += l; - vector.getOffsetBuffer().setInt(VarBinaryVector.OFFSET_WIDTH, l); BitVectorHelper.setBit(vector.getValidityBuffer(), 0); vector.setLastSet(0); @@ -109,9 +95,13 @@ public boolean loadNextBatch() throws IOException { return true; } + /** + * Return the number of bytes read from the ReadChannel. + * + * @return number of bytes read + */ @Override public long bytesRead() { - try { if (inputStream != null) { return inputStream.available(); @@ -122,36 +112,57 @@ public long bytesRead() { } } + /** + * Close the underlying read source. + * + * @throws IOException on error + */ @Override protected void closeReadSource() throws IOException { - try { - if (Objects.nonNull(inputStream)) { - inputStream.close(); - } - } catch (IOException ignored) { + if (Objects.nonNull(inputStream)) { + inputStream.close(); } } + /** + * Read the Schema from the source, will be invoked at the beginning the initialization. + * + * @return the read Schema + * @throws IOException on error + */ @Override protected Schema readSchema() throws IOException { return new Schema(List.of(Field.notNullable(FIELD_NAME, new ArrowType.Binary()))); } - private int readRangeToBuffer(ArrowBuf valueBuffer, int startIndex) { + private void prepare() { + Odps odps = OdpsUtil.initOdps(connectConfig); + try { + inputStream = odps.resources().getResourceAsStream(tableInfo.tableName()); + } catch (OdpsException e) { + throw new RuntimeException(e); + } + } + + private int readRangeToBuffer(ArrowBuf valueBuffer) { if (inputStream == null) { return 0; } try { - if (inputStream.available() == 0) { - return 0; - } - byte[] bytes = new byte[1024]; - int length = inputStream.read(bytes); - valueBuffer.writeBytes(bytes, startIndex, length); - return length; + byte[] bytes = new byte[BYTES_READ]; + int length; + int totalBytesRead = 0; + while ((length = inputStream.read(bytes)) != -1) { + valueBuffer.writeBytes(bytes, 0, length); + totalBytesRead += length; + if (totalBytesRead >= BYTES_THRESHOLD) { + break; + } + } + return totalBytesRead; } catch (IOException e) { throw new RuntimeException(e); } diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsTunnelRecordReader.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsTunnelRecordReader.java new file mode 100644 index 0000000..f221969 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/reader/OdpsTunnelRecordReader.java @@ -0,0 +1,78 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.reader; + +import com.aliyun.odps.data.Record; +import com.aliyun.odps.tunnel.InstanceTunnel; +import com.aliyun.odps.tunnel.io.TunnelRecordReader; +import lombok.extern.slf4j.Slf4j; +import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; +import org.secretflow.dataproxy.common.exceptions.DataproxyException; +import org.secretflow.dataproxy.core.reader.AbstractReader; +import org.secretflow.dataproxy.core.reader.Sender; +import org.secretflow.dataproxy.plugin.odps.config.TaskConfig; + +import java.time.Instant; + +/** + * @author yuexie + * @date 2024/10/31 20:16 + **/ +@Slf4j +public class OdpsTunnelRecordReader extends AbstractReader { + + private final InstanceTunnel.DownloadSession downloadSession; + + public OdpsTunnelRecordReader(TaskConfig param, Sender sender, InstanceTunnel.DownloadSession downloadSession) { + super(param, sender); + + this.downloadSession = downloadSession; + } + + @Override + protected void read(TaskConfig param) { + log.info("Start read odps tunnel record reader. download session: {} start: {}, count: {}", downloadSession.getId(), param.getStartIndex(), param.getCount()); + try (TunnelRecordReader records = + downloadSession.openRecordReader(param.getStartIndex(), param.getCount(), param.isCompress())) { + + int recordCount = 0; + Instant startInstant = Instant.now(); + Instant tempInstant; + for (Record record : records) { + this.put(record); + recordCount++; + + if (recordCount % 10000 == 0) { + if (Thread.currentThread().isInterrupted()) { + log.info("OdpsTunnelRecordReader read interrupted. recordCount: {}", recordCount); + throw new InterruptedException("OdpsTunnelRecordReader read interrupted."); + } + tempInstant = Instant.now(); + if (tempInstant.getEpochSecond() - startInstant.getEpochSecond() > 10) { + log.info("OdpsTunnelRecordReader read: download sessionID: {} recordCount: {}", downloadSession.getId(), recordCount); + startInstant = tempInstant; + } + + } + } + log.info("Read odps tunnel record reader finish. recordCount: {}", recordCount); + } catch (Exception e) { + throw DataproxyException.of(DataproxyErrorCode.ODPS_ERROR, "ODPS read error", e); + } + + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/utils/OdpsUtil.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/utils/OdpsUtil.java new file mode 100644 index 0000000..e46f30c --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/utils/OdpsUtil.java @@ -0,0 +1,104 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.utils; + +import com.aliyun.odps.Column; +import com.aliyun.odps.Odps; +import com.aliyun.odps.OdpsType; +import com.aliyun.odps.account.Account; +import com.aliyun.odps.account.AliyunAccount; +import com.aliyun.odps.type.DecimalTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.secretflow.dataproxy.plugin.odps.config.OdpsConnectConfig; + +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * @author yuexie + * @date 2024/11/3 22:30 + **/ +public class OdpsUtil { + + public final static boolean OVER_WRITE = true; + + + public static Odps initOdps(OdpsConnectConfig config) { + + Account account = new AliyunAccount(config.accessKeyId(), config.accessKeySecret()); + Odps odps = new Odps(account); + odps.setEndpoint(config.endpoint()); + odps.setDefaultProject(config.projectName()); + + return odps; + + } + + public static Map getSqlFlag() { + HashMap hints = new LinkedHashMap<>(); + hints.put("odps.sql.type.system.odps2", "true"); + return hints; + } + + public static Field convertOdpsColumnToArrowField(Column column) { + return Field.nullable(column.getName(), parseOdpsColumnType(column.getTypeInfo())); + } + + /** + * {@see com.aliyun.odps.commons.util.ArrowUtils#getArrowType(com.aliyun.odps.type.TypeInfo)} + * @param type {@link TypeInfo} + * @return {@link ArrowType} + */ + private static ArrowType parseOdpsColumnType(TypeInfo type) { + + OdpsType odpsType = type.getOdpsType(); + return switch (odpsType) { + case JSON,CHAR,VARCHAR, STRING -> Types.MinorType.VARCHAR.getType(); + case BINARY -> Types.MinorType.VARBINARY.getType(); + case TINYINT -> Types.MinorType.TINYINT.getType(); + case SMALLINT ->Types.MinorType.SMALLINT.getType(); + case INT -> Types.MinorType.INT.getType(); + case BIGINT -> Types.MinorType.BIGINT.getType(); + case BOOLEAN -> Types.MinorType.BIT.getType(); + case FLOAT -> Types.MinorType.FLOAT4.getType(); + case DOUBLE -> Types.MinorType.FLOAT8.getType(); + case DATE -> Types.MinorType.DATEDAY.getType(); + case DATETIME -> Types.MinorType.DATEMILLI.getType(); + //TODO: 8 bytes => 12 bytes +// case TIMESTAMP, TIMESTAMP_NTZ -> Types.MinorType.TIMESTAMPNANO.getType(); + case TIMESTAMP, TIMESTAMP_NTZ -> Types.MinorType.TIMESTAMPMILLI.getType(); + case ARRAY -> Types.MinorType.LIST.getType(); + case INTERVAL_DAY_TIME -> Types.MinorType.INTERVALDAY.getType(); + case INTERVAL_YEAR_MONTH -> Types.MinorType.INTERVALYEAR.getType(); + case STRUCT -> Types.MinorType.STRUCT.getType(); + case MAP -> Types.MinorType.MAP.getType(); + case DECIMAL -> { + if (type instanceof DecimalTypeInfo decimalTypeInfo) { + yield new ArrowType.Decimal((decimalTypeInfo).getPrecision(), (decimalTypeInfo).getScale(), 128); + } else { + throw new UnsupportedOperationException("Unsupported type: " + type); + } + } + default -> + throw new UnsupportedOperationException("Unsupported type: " + type); + }; + } +} diff --git a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataWriter.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsRecordWriter.java similarity index 83% rename from dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataWriter.java rename to dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsRecordWriter.java index 2e7d312..1465001 100644 --- a/dataproxy-manager/src/main/java/org/secretflow/dataproxy/manager/connector/odps/OdpsDataWriter.java +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsRecordWriter.java @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.secretflow.dataproxy.manager.connector.odps; +package org.secretflow.dataproxy.plugin.odps.writer; import com.aliyun.odps.Column; import com.aliyun.odps.Odps; @@ -44,10 +44,13 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; import org.secretflow.dataproxy.common.utils.JsonUtils; -import org.secretflow.dataproxy.manager.DataWriter; +import org.secretflow.dataproxy.core.writer.Writer; +import org.secretflow.dataproxy.plugin.odps.config.OdpsCommandConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsConnectConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsWriteConfig; +import org.secretflow.dataproxy.plugin.odps.utils.OdpsUtil; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -55,39 +58,30 @@ import java.util.List; /** - * odps Table Writer - * * @author yuexie - * @date 2024-06-01 17:08:45 - */ + * @date 2024/11/10 21:50 + **/ @Slf4j -public class OdpsDataWriter implements DataWriter { - - - private final OdpsConnConfig connConfig; - - private final OdpsTableInfo tableInfo; +public class OdpsRecordWriter implements Writer { - private final Schema schema; + private final OdpsCommandConfig commandConfig; + private final OdpsConnectConfig odpsConnectConfig; + private final OdpsTableConfig odpsTableConfig; - private final boolean overwrite = true; - - private boolean isPartitioned = false; - - private TableSchema odpsTableSchema = null; private TableTunnel.UploadSession uploadSession = null; + private TableSchema odpsTableSchema = null; private RecordWriter recordWriter = null; + private boolean isPartitioned = false; - public OdpsDataWriter(OdpsConnConfig connConfig, OdpsTableInfo tableInfo, Schema schema) throws OdpsException, IOException { - this.connConfig = connConfig; - this.tableInfo = tableInfo; - this.schema = schema; - initOdps(); + public OdpsRecordWriter(OdpsWriteConfig commandConfig) { + this.commandConfig = commandConfig; + this.odpsConnectConfig = commandConfig.getOdpsConnectConfig(); + this.odpsTableConfig = commandConfig.getCommandConfig(); + this.prepare(); } @Override - public void write(VectorSchemaRoot root) throws IOException { - + public void write(VectorSchemaRoot root) { final int batchSize = root.getRowCount(); log.info("odps writer batchSize: {}", batchSize); int columnCount = root.getFieldVectors().size(); @@ -108,18 +102,21 @@ record = uploadSession.newRecord(); if (tableSchema.containsColumn(columnName)) { this.setRecordValue(record, tableSchema.getColumnIndex(columnName), this.getValue(root.getFieldVectors().get(columnIndex), rowIndex)); } else { - log.warn("column: `{}` not exists in table: {}", columnName, tableInfo.tableName()); + log.warn("column: `{}` not exists in table: {}", columnName, odpsTableConfig.tableName()); } } - recordWriter.write(record); + try { + recordWriter.write(record); + } catch (IOException e) { + throw new RuntimeException(e); + } log.debug("record: {}", record); } - } @Override - public void flush() throws IOException { + public void flush() { try { if (recordWriter != null) { recordWriter.close(); @@ -132,50 +129,42 @@ public void flush() throws IOException { } } - @Override - public void destroy() throws IOException { - - } - - @Override - public void close() throws Exception { - // odps no close function - } - - private Odps initOdpsClient(OdpsConnConfig odpsConnConfig) { - - if (odpsConnConfig == null) { + private Odps initOdpsClient(OdpsConnectConfig odpsConnectConfig) { + if (odpsConnectConfig == null) { throw new IllegalArgumentException("connConfig is null"); } - - return OdpsUtil.buildOdps(odpsConnConfig); + return OdpsUtil.initOdps(odpsConnectConfig); } - private void initOdps() throws OdpsException, IOException { - // init odps client - Odps odps = initOdpsClient(this.connConfig); - // Pre-processing - PartitionSpec convertPartitionSpec = this.convertToPartitionSpec(tableInfo.partitionSpec()); - preProcessing(odps, connConfig.getProjectName(), tableInfo.tableName(), convertPartitionSpec); - // init upload session - TableTunnel tunnel = new TableTunnel(odps); - - if (isPartitioned) { - if (tableInfo.partitionSpec() == null || tableInfo.partitionSpec().isEmpty()) { - throw DataproxyException.of(DataproxyErrorCode.INVALID_PARTITION_SPEC, "partitionSpec is empty"); - } - assert this.odpsTableSchema != null; - List partitionColumns = this.odpsTableSchema.getPartitionColumns(); - PartitionSpec partitionSpec = new PartitionSpec(); - for (Column partitionColumn : partitionColumns) { - partitionSpec.set(partitionColumn.getName(), convertPartitionSpec.get(partitionColumn.getName())); + private void prepare() { + try { + // init odps client + Odps odps = initOdpsClient(odpsConnectConfig); + // Pre-processing + PartitionSpec convertPartitionSpec = this.convertToPartitionSpec(odpsTableConfig.partition()); + preProcessing(odps, odpsConnectConfig.projectName(), odpsTableConfig.tableName(), convertPartitionSpec); + // init upload session + TableTunnel tunnel = new TableTunnel(odps); + + if (isPartitioned) { + if (odpsTableConfig.partition() == null || odpsTableConfig.partition().isEmpty()) { + throw DataproxyException.of(DataproxyErrorCode.INVALID_PARTITION_SPEC, "partitionSpec is empty"); + } + assert this.odpsTableSchema != null; + List partitionColumns = this.odpsTableSchema.getPartitionColumns(); + PartitionSpec partitionSpec = new PartitionSpec(); + for (Column partitionColumn : partitionColumns) { + partitionSpec.set(partitionColumn.getName(), convertPartitionSpec.get(partitionColumn.getName())); + } + uploadSession = tunnel.createUploadSession(odpsConnectConfig.projectName(), odpsTableConfig.tableName(), partitionSpec, OdpsUtil.OVER_WRITE); + } else { + uploadSession = tunnel.createUploadSession(odpsConnectConfig.projectName(), odpsTableConfig.tableName(), OdpsUtil.OVER_WRITE); } - uploadSession = tunnel.createUploadSession(connConfig.getProjectName(), tableInfo.tableName(), partitionSpec, overwrite); - } else { - uploadSession = tunnel.createUploadSession(connConfig.getProjectName(), tableInfo.tableName(), overwrite); - } - recordWriter = uploadSession.openRecordWriter(0, true); + recordWriter = uploadSession.openRecordWriter(0, true); + } catch (OdpsException | IOException e) { + throw DataproxyException.of(DataproxyErrorCode.ODPS_ERROR, "An exception occurred while writer initializing", e); + } } /** @@ -190,7 +179,7 @@ private void initOdps() throws OdpsException, IOException { private void setRecordValue(Record record, int columnIndex, Object value) { if (value == null) { record.set(columnIndex, null); - log.warn("table name: {} record set null value. index: {}", tableInfo.tableName(), columnIndex); + log.warn("table name: {} record set null value. index: {}", odpsTableConfig.tableName(), columnIndex); return; } @@ -272,7 +261,7 @@ private Object getValue(FieldVector fieldVector, int index) { private void preProcessing(Odps odps, String projectName, String tableName, PartitionSpec partitionSpec) throws OdpsException { if (!isExistsTable(odps, projectName, tableName)) { - boolean odpsTable = createOdpsTable(odps, projectName, tableName, schema, partitionSpec); + boolean odpsTable = createOdpsTable(odps, projectName, tableName, commandConfig.getResultSchema(), partitionSpec); if (!odpsTable) { throw DataproxyException.of(DataproxyErrorCode.ODPS_CREATE_TABLE_FAILED); } diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsResourceWriter.java b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsResourceWriter.java new file mode 100644 index 0000000..fb402b2 --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/java/org/secretflow/dataproxy/plugin/odps/writer/OdpsResourceWriter.java @@ -0,0 +1,152 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.plugin.odps.writer; + +import com.aliyun.odps.FileResource; +import com.aliyun.odps.NoSuchObjectException; +import com.aliyun.odps.Odps; +import com.aliyun.odps.OdpsException; +import com.aliyun.odps.Resource; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; +import org.secretflow.dataproxy.common.exceptions.DataproxyException; +import org.secretflow.dataproxy.core.writer.Writer; +import org.secretflow.dataproxy.plugin.odps.config.OdpsConnectConfig; +import org.secretflow.dataproxy.plugin.odps.config.OdpsTableConfig; +import org.secretflow.dataproxy.plugin.odps.io.DynamicSequenceInputStream; +import org.secretflow.dataproxy.plugin.odps.utils.OdpsUtil; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * @author yuexie + * @date 2024/11/11 14:50 + **/ +@Slf4j +public class OdpsResourceWriter implements Writer { + + private final OdpsConnectConfig odpsConnectConfig; + + private final OdpsTableConfig odpsTableConfig; + + private Odps odps; + + private static final String FIELD_NAME = "binary_data"; + + private final DynamicSequenceInputStream dynamicSequenceInputStream = new DynamicSequenceInputStream(); + + private final AtomicBoolean initedFileResource = new AtomicBoolean(false); + + private final ExecutorService executorService = Executors.newSingleThreadExecutor(); + + private Future future = null; + + public OdpsResourceWriter(OdpsConnectConfig odpsConnectConfig, OdpsTableConfig odpsTableConfig) { + this.odpsConnectConfig = odpsConnectConfig; + this.odpsTableConfig = odpsTableConfig; + initOdps(); + } + + + @Override + public void write(VectorSchemaRoot root) { + + if (future != null && future.isDone()) { + throw new RuntimeException("Odps resource writer is closed"); + } + + FieldVector vector = root.getVector(FIELD_NAME); + + if (vector instanceof VarBinaryVector varBinaryVector) { + + int rowCount = root.getRowCount(); + for (int row = 0; row < rowCount; row++) { + byte[] bytes = varBinaryVector.get(row); + + dynamicSequenceInputStream.appendStream(new ByteArrayInputStream(bytes)); + + if (!initedFileResource.get()) { + future = executorService.submit(() -> createOrUpdateResource(odps, odpsTableConfig.tableName(), dynamicSequenceInputStream)); + } + + } + } else { + throw DataproxyException.of(DataproxyErrorCode.UNSUPPORTED_FIELD_TYPE, "Only support VarBinaryVector type"); + } + } + + @Override + public void flush() { + // no implementation + dynamicSequenceInputStream.setCompleted(); + try { + future.get(); + dynamicSequenceInputStream.close(); + executorService.shutdown(); + } catch (IOException | ExecutionException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + private void initOdps() { + odps = OdpsUtil.initOdps(odpsConnectConfig); + } + + private static boolean resourceExists(Odps odps, String resourceName) throws OdpsException { + try { + Resource resource = odps.resources().get(resourceName); + resource.reload(); + return true; + } catch (NoSuchObjectException e) { + return false; + } + } + + private void createOrUpdateResource(Odps odps, String resourceName, InputStream inputStream) { + try { + + if (initedFileResource.get()) { + return; + } + initedFileResource.set(true); + + FileResource resource = new FileResource(); + resource.setName(resourceName); + + if (resourceExists(odps, resourceName)) { + odps.resources().update(resource, inputStream); + } else { + odps.resources().create(resource, inputStream); + } + + } catch (OdpsException e) { + throw new RuntimeException(e); + } + } +} diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader b/dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader new file mode 100644 index 0000000..ff3180a --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.config.ConfigLoader @@ -0,0 +1,2 @@ +org.secretflow.dataproxy.plugin.odps.config.DefaultOdpsFlightConfigLoader +org.secretflow.dataproxy.plugin.odps.config.EnvironmentOdpsFlightConfigLoader \ No newline at end of file diff --git a/dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.spi.producer.DataProxyFlightProducer b/dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.spi.producer.DataProxyFlightProducer new file mode 100644 index 0000000..434319c --- /dev/null +++ b/dataproxy-plugins/dataproxy-plugin-odps/src/main/resources/META-INF/services/org.secretflow.dataproxy.core.spi.producer.DataProxyFlightProducer @@ -0,0 +1 @@ +org.secretflow.dataproxy.plugin.odps.producer.OdpsFlightProducer \ No newline at end of file diff --git a/dataproxy-service/pom.xml b/dataproxy-plugins/pom.xml similarity index 51% rename from dataproxy-service/pom.xml rename to dataproxy-plugins/pom.xml index bb2f2cd..f46b3ee 100644 --- a/dataproxy-service/pom.xml +++ b/dataproxy-plugins/pom.xml @@ -1,6 +1,4 @@ - - 4.0.0 @@ -9,7 +7,14 @@ 0.0.1-SNAPSHOT - dataproxy-service + dataproxy-plugins + pom + + + dataproxy-plugin-odps + + + dataproxy-plugins @@ -18,18 +23,7 @@ org.secretflow - dataproxy-manager + dataproxy-core - - com.github.ben-manes.caffeine - caffeine - - - - junit - junit - - - - \ No newline at end of file + diff --git a/dataproxy-server/pom.xml b/dataproxy-server/pom.xml index 696e4a4..a4e4347 100644 --- a/dataproxy-server/pom.xml +++ b/dataproxy-server/pom.xml @@ -7,71 +7,68 @@ org.secretflow dataproxy 0.0.1-SNAPSHOT + ../pom.xml dataproxy-server + jar + org.secretflow - dataproxy-manager + dataproxy-core org.secretflow - dataproxy-service + dataproxy-plugin-odps - + + - - org.aspectj - aspectjweaver + org.slf4j + slf4j-api + - org.aspectj - aspectjrt + ch.qos.logback + logback-core - - junit - junit - test + ch.qos.logback + logback-classic - + - org.springframework.boot - spring-boot-maven-plugin + org.apache.maven.plugins + maven-jar-plugin + ${maven-jar-plugin.version} - ../target - dataproxy + + + true + libs/ + + org.secretflow.dataproxy.server.DataProxyServerApplication + + + - - - ../config - ${project.basedir}/config - - - ../scripts/test - ${project.basedir}/config - - + \ No newline at end of file diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyApplication.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyApplication.java deleted file mode 100644 index ec94dc7..0000000 --- a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyApplication.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.server; - -import org.springframework.boot.WebApplicationType; -import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.builder.SpringApplicationBuilder; -import org.springframework.context.annotation.EnableAspectJAutoProxy; -import org.springframework.scheduling.annotation.EnableAsync; -import org.springframework.scheduling.annotation.EnableScheduling; - -/** - * @author muhong - * @date 2023-08-08 7:43 PM - */ -@EnableAsync -@EnableScheduling -@EnableAspectJAutoProxy -@SpringBootApplication(scanBasePackages = "org.secretflow.dataproxy") -public class DataProxyApplication { - - public static void main(String[] args) { - new SpringApplicationBuilder(DataProxyApplication.class) - .web(WebApplicationType.NONE) - .run(args); - } -} diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyFlightServer.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyFlightServer.java new file mode 100644 index 0000000..927768e --- /dev/null +++ b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyFlightServer.java @@ -0,0 +1,87 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.server; + +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.flight.FlightServer; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.secretflow.dataproxy.core.config.FlightServerConfig; +import org.secretflow.dataproxy.core.spi.producer.DataProxyFlightProducer; +import org.secretflow.dataproxy.server.flight.CompositeFlightProducer; +import org.secretflow.dataproxy.server.flight.ProducerRegistry; + +import java.io.IOException; +import java.util.ServiceLoader; + +/** + * Data Proxy server main class. + * + * @author yuexie + * @date 2024/10/30 16:03 + **/ +@Slf4j +public class DataProxyFlightServer implements AutoCloseable { + + private final FlightServer server; + + public DataProxyFlightServer(FlightServerConfig config) { + server = init(config); + } + + public void start() throws IOException { + server.start(); + log.info(server.getLocation().getUri().getHost()); + } + + public void awaitTermination() throws InterruptedException { + server.awaitTermination(); + } + + @Override + public void close() throws Exception { + server.close(); + } + + private FlightServer init(FlightServerConfig config) { + + BufferAllocator allocator = new RootAllocator(); + + return FlightServer.builder() +// .useTls(null, null) +// .useMTlsClientVerification(null) + .middleware(FlightServerTraceMiddleware.getKey(), new FlightServerTraceMiddleware.FlightServerTraceMiddlewareFactory()) + .allocator(allocator) + .location(config.getLocation()) + .producer(initProducer()) + .build(); + } + + private CompositeFlightProducer initProducer() { + + ServiceLoader serviceLoader = ServiceLoader.load(DataProxyFlightProducer.class, DataProxyFlightProducer.class.getClassLoader()); + + ProducerRegistry registry = ProducerRegistry.getInstance(); + for (DataProxyFlightProducer producer : serviceLoader) { + registry.register(producer.getProducerName(), producer); + log.info("ProducerRegistry register: {}", producer.getProducerName()); + } + + return new CompositeFlightProducer(registry); + } + +} diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyServerApplication.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyServerApplication.java new file mode 100644 index 0000000..5241f44 --- /dev/null +++ b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataProxyServerApplication.java @@ -0,0 +1,46 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.server; + +import lombok.extern.slf4j.Slf4j; +import org.secretflow.dataproxy.core.config.FlightServerConfig; +import org.secretflow.dataproxy.core.config.FlightServerContext; + +/** + * @author yuexie + * @date 2024/11/6 16:27 + **/ +@Slf4j +public class DataProxyServerApplication { + + public static void main(String[] args) { + log.info("Starting DataProxyFlightServer"); + + FlightServerConfig flightServerConfig = FlightServerContext.getInstance().getFlightServerConfig(); + + try (DataProxyFlightServer dataProxyFlightServer = new DataProxyFlightServer(flightServerConfig)) { + dataProxyFlightServer.start(); + log.info("Data proxy flight server start at {}:{}", flightServerConfig.getLocation().getUri().getHost(), flightServerConfig.port()); + dataProxyFlightServer.awaitTermination(); + } catch (Exception e) { + log.error("DataProxyFlightServer start failed", e); + throw new RuntimeException(e); + } finally { + log.warn("DataProxyFlightServer stopped"); + } + } +} diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataproxyLauncher.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataproxyLauncher.java deleted file mode 100644 index 1bb78be..0000000 --- a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/DataproxyLauncher.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.server; - -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.CommandLineRunner; -import org.springframework.stereotype.Service; - -/** - * arrow flight服务启动类 - * - * @author muhong - * @date 2023-08-07 10:26 AM - */ -@Slf4j -@Service -public class DataproxyLauncher implements CommandLineRunner { - - private FlightServer flightServer; - - @Autowired - private BufferAllocator bufferAllocator; - - @Autowired - private FlightProducer flightProducer; - - @Autowired - private Location location; - - /** - * GRPC 服务启动方法 - */ - private void grpcStart() { - FlightServer.Builder flightServerBuilder = FlightServer.builder() - .allocator(bufferAllocator) - .middleware(FlightServerTraceMiddleware.getKey(), new FlightServerTraceMiddleware.FlightServerTraceMiddlewareFactory()) - .location(location); - - try (FlightServer server = flightServerBuilder.producer(flightProducer).build()) { - flightServer = server; - flightServer.start(); - log.info("Fastds server launch success, listening on port {}, ip:{}", flightServer.getPort(), location.getUri().getHost()); - flightServer.awaitTermination(); - Runtime.getRuntime().addShutdownHook(new Thread(this::grpcStop)); - } catch (Exception e) { - log.error("fastds launch failed", e); - throw new RuntimeException("Failed to start Flight Server", e); - } - } - - /** - * GRPC 服务Stop方法 - */ - private void grpcStop() { - if (flightServer != null) { - try { - flightServer.close(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - } - - @Override - public void run(String... args) throws Exception { - grpcStart(); - } -} diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/FlightServerTraceMiddleware.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/FlightServerTraceMiddleware.java index f83a8f0..d5f62bd 100644 --- a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/FlightServerTraceMiddleware.java +++ b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/FlightServerTraceMiddleware.java @@ -16,14 +16,18 @@ package org.secretflow.dataproxy.server; +import org.apache.arrow.flight.CallHeaders; +import org.apache.arrow.flight.CallInfo; +import org.apache.arrow.flight.CallStatus; +import org.apache.arrow.flight.FlightServerMiddleware; +import org.apache.arrow.flight.RequestContext; import org.secretflow.dataproxy.common.utils.IdUtils; - -import org.apache.arrow.flight.*; -import org.apache.commons.lang3.StringUtils; import org.slf4j.MDC; +import java.util.Objects; + /** - * trace_id中间件 + * trace_id - oriented middleware * * @author muhong * @date 2023-09-25 14:39 @@ -55,10 +59,10 @@ public void onCallErrored(Throwable err) { public static class FlightServerTraceMiddlewareFactory implements Factory { @Override public FlightServerTraceMiddleware onCallStarted(CallInfo info, CallHeaders incomingHeaders, RequestContext context) { - // 设置调用链路 Trace ID + // Set the call link: Trace ID String traceId = incomingHeaders.get(TRACE_ID_KEY); - // 如果未传入 trace id 则生成一个 - if (StringUtils.isEmpty(traceId)) { + // If no trace id is passed, one is generated + if (Objects.isNull(traceId) || traceId.isEmpty()) { traceId = GENERATE_TRACE_ID_PREFIX + "-" + IdUtils.createRandString(32); } MDC.put("TraceId", traceId); diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/ProtoObjConvertor.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/ProtoObjConvertor.java index ad62cdf..b6cbd91 100644 --- a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/ProtoObjConvertor.java +++ b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/ProtoObjConvertor.java @@ -16,39 +16,6 @@ package org.secretflow.dataproxy.server; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.commons.collections4.CollectionUtils; -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.FlightContentFormatConfig; -import org.secretflow.dataproxy.common.model.FlightContentFormatTypeEnum; -import org.secretflow.dataproxy.common.model.dataset.Dataset; -import org.secretflow.dataproxy.common.model.dataset.DatasetFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.DatasetSchema; -import org.secretflow.dataproxy.common.model.dataset.format.CSVFormatConfig; -import org.secretflow.dataproxy.common.model.dataset.format.DatasetFormatTypeEnum; -import org.secretflow.dataproxy.common.model.dataset.schema.DatasetSchemaTypeEnum; -import org.secretflow.dataproxy.common.model.datasource.DatasetLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.Datasource; -import org.secretflow.dataproxy.common.model.datasource.DatasourceConnConfig; -import org.secretflow.dataproxy.common.model.datasource.DatasourceTypeEnum; -import org.secretflow.dataproxy.common.model.datasource.conn.LocalFileSystemConnConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.MysqlConnConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.ObjectFileSystemConnConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.OdpsConnConfig; -import org.secretflow.dataproxy.common.model.datasource.location.FileSystemLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.location.MysqlLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.v1alpha1.common.Common; -import org.secretflow.v1alpha1.kusciaapi.Domaindata; -import org.secretflow.v1alpha1.kusciaapi.Domaindatasource; -import org.secretflow.v1alpha1.kusciaapi.Flightdm; - -import java.util.stream.Collectors; - /** * Kuscia接口转换器 * @@ -60,211 +27,191 @@ public class ProtoObjConvertor { /** * 将 Kuscia gRPC 的数据源元信息转化为数据源元信息 */ - public static Datasource fromProto(Domaindatasource.DomainDataSource domainDataSource) { - - return Datasource.builder() - .datasourceId(domainDataSource.getDatasourceId()) - .name(domainDataSource.getName()) - .connConfig(fromProto(domainDataSource.getType(), domainDataSource.getInfo())) - .writable(true) - .build(); - } - - public static DatasourceConnConfig fromProto(String domainDataSourceType, Domaindatasource.DataSourceInfo dataSourceInfo) { - switch (domainDataSourceType) { - case "localfs": { - LocalFileSystemConnConfig connConfig = LocalFileSystemConnConfig.builder().build(); - if (dataSourceInfo.hasLocalfs()) { - connConfig.setPath(dataSourceInfo.getLocalfs().getPath()); - } - - return DatasourceConnConfig.builder() - .type(DatasourceTypeEnum.LOCAL_HOST) - .connConfig(connConfig) - .build(); - } - case "oss": { - if (!dataSourceInfo.hasOss()) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "OSS连接信息缺失"); - } - - DatasourceTypeEnum type = null; - switch (dataSourceInfo.getOss().getStorageType()) { - case "oss": - type = DatasourceTypeEnum.OSS; - break; - case "minio": - type = DatasourceTypeEnum.MINIO; - break; - default: - type = DatasourceTypeEnum.OSS; - } - - ObjectFileSystemConnConfig connConfig = ObjectFileSystemConnConfig.builder() - .endpoint(dataSourceInfo.getOss().getEndpoint()) - .bucket(dataSourceInfo.getOss().getBucket()) - .objectKeyPrefix(dataSourceInfo.getOss().getPrefix()) - .accessKey(dataSourceInfo.getOss().getAccessKeyId()) - .accessSecret(dataSourceInfo.getOss().getAccessKeySecret()) - .build(); - return DatasourceConnConfig.builder() - .type(type) - .connConfig(connConfig) - .build(); - } - case "mysql": { - if (!dataSourceInfo.hasDatabase()) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "数据库连接信息缺失"); - } - - MysqlConnConfig connConfig = MysqlConnConfig.builder() - .host(dataSourceInfo.getDatabase().getEndpoint()) - .userName(dataSourceInfo.getDatabase().getUser()) - .password(dataSourceInfo.getDatabase().getPassword()) - .database(dataSourceInfo.getDatabase().getDatabase()) - .build(); - return DatasourceConnConfig.builder() - .type(DatasourceTypeEnum.MYSQL) - .connConfig(connConfig) - .build(); - } - case "odps": { - if (!dataSourceInfo.hasOdps()) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "数据库连接信息缺失"); - } - - OdpsConnConfig config = - OdpsConnConfig.builder() - .accessKeyId(dataSourceInfo.getOdps().getAccessKeyId()) - .accessKeySecret(dataSourceInfo.getOdps().getAccessKeySecret()) - .projectName(dataSourceInfo.getOdps().getProject()) - .endpoint(dataSourceInfo.getOdps().getEndpoint()) - .build(); - - return DatasourceConnConfig.builder() - .type(DatasourceTypeEnum.ODPS) - .connConfig(config) - .build(); - } - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的数据源类型 " + domainDataSourceType); - } - } - - public static Dataset fromProto(Domaindata.DomainData domainData, Datasource datasource) { - DatasetFormatConfig formatConfig = DatasetFormatConfig.builder().build(); - - // 数据集位置信息映射 - DatasetLocationConfig locationConfig = DatasetLocationConfig.builder() - .datasourceId(domainData.getDatasourceId()) - .build(); - switch (datasource.getConnConfig().getType()) { - case LOCAL_HOST: - case OSS: - case MINIO: - case OBS: - locationConfig.setLocationConfig(FileSystemLocationConfig.builder() - .relativePath(domainData.getRelativeUri()) - .build()); - - if (domainData.getFileFormat() == Common.FileFormat.CSV) { - formatConfig.setType(DatasetFormatTypeEnum.CSV); - formatConfig.setFormatConfig(CSVFormatConfig.builder().build()); - } else { - formatConfig.setType(DatasetFormatTypeEnum.BINARY_FILE); - } - break; - case MYSQL: { - locationConfig.setLocationConfig(MysqlLocationConfig.builder() - .table(domainData.getRelativeUri()) - .build()); - formatConfig.setType(DatasetFormatTypeEnum.TABLE); - break; - } - case ODPS: - locationConfig.setLocationConfig(OdpsTableInfo.fromKusciaData(domainData)); - if (domainData.getFileFormat() == Common.FileFormat.CSV ) { - formatConfig.setType(DatasetFormatTypeEnum.TABLE); - } else { - formatConfig.setType(DatasetFormatTypeEnum.BINARY_FILE); - } - break; - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的数据源类型 " + datasource.getConnConfig().getType()); - } - - DatasetSchema datasetSchema = DatasetSchema.builder().build(); - switch (domainData.getType()) { - case "table": { - datasetSchema.setType(DatasetSchemaTypeEnum.STRUCTURED_DATA); - if (CollectionUtils.isNotEmpty(domainData.getColumnsList())) { - - Schema schema = new Schema(domainData.getColumnsList().stream() - .map(column -> - Field.nullable(column.getName(), parseArrowTypeFrom(column.getType()))) - .collect(Collectors.toList())); - datasetSchema.setArrowSchema(schema); - } - break; - } - case "model", "report": { - datasetSchema.setType(DatasetSchemaTypeEnum.BINARY); - break; - } - default: - datasetSchema.setType(DatasetSchemaTypeEnum.BINARY); - break; - } - - return Dataset.builder() - .datasetId(domainData.getDomaindataId()) - .name(domainData.getName()) - .locationConfig(locationConfig) - .schema(datasetSchema) - .formatConfig(formatConfig) - .ownerId(domainData.getVendor()) - .build(); - } - - public static ArrowType parseArrowTypeFrom(String type) { - // string integer float datetime timestamp - return switch (type) { - case "int8" -> Types.MinorType.TINYINT.getType(); - case "int16" -> Types.MinorType.SMALLINT.getType(); - case "int32" -> Types.MinorType.INT.getType(); - case "int64", "int" -> Types.MinorType.BIGINT.getType(); - case "unit8" -> Types.MinorType.UINT1.getType(); - case "uint16" -> Types.MinorType.UINT2.getType(); - case "uint32" -> Types.MinorType.UINT4.getType(); - case "uint64" -> Types.MinorType.UINT8.getType(); - case "float32" -> Types.MinorType.FLOAT4.getType(); - case "float64", "float" -> Types.MinorType.FLOAT8.getType(); - case "date32" -> Types.MinorType.DATEDAY.getType(); - case "date64" -> Types.MinorType.DATEMILLI.getType(); - case "bool" -> Types.MinorType.BIT.getType(); - case "string", "str" -> Types.MinorType.VARCHAR.getType(); - case "binary" -> Types.MinorType.VARBINARY.getType(); - default -> throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的字段类型 " + type); - }; - } - - public static FlightContentFormatConfig fromProto(Flightdm.ContentType contentType) { - FlightContentFormatConfig formatConfig = FlightContentFormatConfig.builder().build(); - - switch (contentType) { - case CSV: - formatConfig.setFormatType(FlightContentFormatTypeEnum.CSV); - formatConfig.setFormatConfig(CSVFormatConfig.builder().build()); - break; - case RAW: - formatConfig.setFormatType(FlightContentFormatTypeEnum.BINARY_FILE); - break; - case Table: - default: - formatConfig.setFormatType(FlightContentFormatTypeEnum.STRUCTURED_DATA); - break; - } - - return formatConfig; - } +// public static Datasource fromProto(Domaindatasource.DomainDataSource domainDataSource) { +// +// return Datasource.builder() +// .datasourceId(domainDataSource.getDatasourceId()) +// .name(domainDataSource.getName()) +// .connConfig(fromProto(domainDataSource.getType(), domainDataSource.getInfo())) +// .writable(true) +// .build(); +// } +// +// public static DatasourceConnConfig fromProto(String domainDataSourceType, Domaindatasource.DataSourceInfo dataSourceInfo) { +// switch (domainDataSourceType) { +// case "localfs": { +// LocalFileSystemConnConfig connConfig = LocalFileSystemConnConfig.builder().build(); +// if (dataSourceInfo.hasLocalfs()) { +// connConfig.setPath(dataSourceInfo.getLocalfs().getPath()); +// } +// +// return DatasourceConnConfig.builder() +// .type(DatasourceTypeEnum.LOCAL_HOST) +// .connConfig(connConfig) +// .build(); +// } +// case "oss": { +// if (!dataSourceInfo.hasOss()) { +// throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "OSS连接信息缺失"); +// } +// +// DatasourceTypeEnum type = null; +// switch (dataSourceInfo.getOss().getStorageType()) { +// case "oss": +// type = DatasourceTypeEnum.OSS; +// break; +// case "minio": +// type = DatasourceTypeEnum.MINIO; +// break; +// default: +// type = DatasourceTypeEnum.OSS; +// } +// +// ObjectFileSystemConnConfig connConfig = ObjectFileSystemConnConfig.builder() +// .endpoint(dataSourceInfo.getOss().getEndpoint()) +// .bucket(dataSourceInfo.getOss().getBucket()) +// .objectKeyPrefix(dataSourceInfo.getOss().getPrefix()) +// .accessKey(dataSourceInfo.getOss().getAccessKeyId()) +// .accessSecret(dataSourceInfo.getOss().getAccessKeySecret()) +// .build(); +// return DatasourceConnConfig.builder() +// .type(type) +// .connConfig(connConfig) +// .build(); +// } +// case "mysql": { +// if (!dataSourceInfo.hasDatabase()) { +// throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "数据库连接信息缺失"); +// } +// +// MysqlConnConfig connConfig = MysqlConnConfig.builder() +// .host(dataSourceInfo.getDatabase().getEndpoint()) +// .userName(dataSourceInfo.getDatabase().getUser()) +// .password(dataSourceInfo.getDatabase().getPassword()) +// .database(dataSourceInfo.getDatabase().getDatabase()) +// .build(); +// return DatasourceConnConfig.builder() +// .type(DatasourceTypeEnum.MYSQL) +// .connConfig(connConfig) +// .build(); +// } +// case "odps": { +// if (!dataSourceInfo.hasOdps()) { +// throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "数据库连接信息缺失"); +// } +// +// OdpsConnConfig config = +// OdpsConnConfig.builder() +// .accessKeyId(dataSourceInfo.getOdps().getAccessKeyId()) +// .accessKeySecret(dataSourceInfo.getOdps().getAccessKeySecret()) +// .projectName(dataSourceInfo.getOdps().getProject()) +// .endpoint(dataSourceInfo.getOdps().getEndpoint()) +// .build(); +// +// return DatasourceConnConfig.builder() +// .type(DatasourceTypeEnum.ODPS) +// .connConfig(config) +// .build(); +// } +// default: +// throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的数据源类型 " + domainDataSourceType); +// } +// } +// +// public static Dataset fromProto(Domaindata.DomainData domainData, Datasource datasource) { +// DatasetFormatConfig formatConfig = DatasetFormatConfig.builder().build(); +// +// // 数据集位置信息映射 +// DatasetLocationConfig locationConfig = DatasetLocationConfig.builder() +// .datasourceId(domainData.getDatasourceId()) +// .build(); +// switch (datasource.getConnConfig().getType()) { +// case LOCAL_HOST: +// case OSS: +// case MINIO: +// case OBS: +// locationConfig.setLocationConfig(FileSystemLocationConfig.builder() +// .relativePath(domainData.getRelativeUri()) +// .build()); +// +// if (domainData.getFileFormat() == Common.FileFormat.CSV) { +// formatConfig.setType(DatasetFormatTypeEnum.CSV); +// formatConfig.setFormatConfig(CSVFormatConfig.builder().build()); +// } else { +// formatConfig.setType(DatasetFormatTypeEnum.BINARY_FILE); +// } +// break; +// case MYSQL: { +// locationConfig.setLocationConfig(MysqlLocationConfig.builder() +// .table(domainData.getRelativeUri()) +// .build()); +// formatConfig.setType(DatasetFormatTypeEnum.TABLE); +// break; +// } +// case ODPS: +// locationConfig.setLocationConfig(OdpsTableInfo.fromKusciaData(domainData)); +// if (domainData.getFileFormat() == Common.FileFormat.CSV ) { +// formatConfig.setType(DatasetFormatTypeEnum.TABLE); +// } else { +// formatConfig.setType(DatasetFormatTypeEnum.BINARY_FILE); +// } +// break; +// default: +// throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的数据源类型 " + datasource.getConnConfig().getType()); +// } +// +// DatasetSchema datasetSchema = DatasetSchema.builder().build(); +// switch (domainData.getType()) { +// case "table": { +// datasetSchema.setType(DatasetSchemaTypeEnum.STRUCTURED_DATA); +// if (CollectionUtils.isNotEmpty(domainData.getColumnsList())) { +// +// Schema schema = new Schema(domainData.getColumnsList().stream() +// .map(column -> +// Field.nullable(column.getName(), parseArrowTypeFrom(column.getType()))) +// .collect(Collectors.toList())); +// datasetSchema.setArrowSchema(schema); +// } +// break; +// } +// case "model", "report": { +// datasetSchema.setType(DatasetSchemaTypeEnum.BINARY); +// break; +// } +// default: +// datasetSchema.setType(DatasetSchemaTypeEnum.BINARY); +// break; +// } +// +// return Dataset.builder() +// .datasetId(domainData.getDomaindataId()) +// .name(domainData.getName()) +// .locationConfig(locationConfig) +// .schema(datasetSchema) +// .formatConfig(formatConfig) +// .ownerId(domainData.getVendor()) +// .build(); +// } +// +// +// +// public static FlightContentFormatConfig fromProto(Flightdm.ContentType contentType) { +// FlightContentFormatConfig formatConfig = FlightContentFormatConfig.builder().build(); +// +// switch (contentType) { +// case CSV: +// formatConfig.setFormatType(FlightContentFormatTypeEnum.CSV); +// formatConfig.setFormatConfig(CSVFormatConfig.builder().build()); +// break; +// case RAW: +// formatConfig.setFormatType(FlightContentFormatTypeEnum.BINARY_FILE); +// break; +// case Table: +// default: +// formatConfig.setFormatType(FlightContentFormatTypeEnum.STRUCTURED_DATA); +// break; +// } +// +// return formatConfig; +// } } diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/config/ArrowConfig.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/config/ArrowConfig.java deleted file mode 100644 index c613ee0..0000000 --- a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/config/ArrowConfig.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.server.config; - -import lombok.Data; -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.commons.lang3.StringUtils; -import org.secretflow.dataproxy.common.utils.JsonUtils; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - -import java.io.IOException; -import java.net.Inet4Address; -import java.net.InetAddress; -import java.net.NetworkInterface; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.Enumeration; -import java.util.List; - -/** - * @author muhong - * @date 2023-08-07 10:43 AM - */ -@Slf4j -@Configuration -public class ArrowConfig { - - @Value("${dataproxy.flight.port}") - private int defaultPort; - - @Bean - public BufferAllocator bufferAllocator() { - return new RootAllocator(); - } - - @Bean - public Location location() { - try { - String localMachineHost = ""; - final Enumeration interfaces = NetworkInterface.getNetworkInterfaces(); - while (interfaces.hasMoreElements()) { - NetworkInterface networkInterface = interfaces.nextElement(); - if (networkInterface.isLoopback() || !networkInterface.isUp()) { - continue; - } - final Enumeration addresses = networkInterface.getInetAddresses(); - while (addresses.hasMoreElements()) { - InetAddress inetAddress = addresses.nextElement(); - if (!inetAddress.isLoopbackAddress() && inetAddress instanceof Inet4Address) { - localMachineHost = inetAddress.getHostAddress(); - } - } - } - - int port = parsePort(); - return Location.forGrpcInsecure(localMachineHost, port); - } catch (Exception e) { - log.error("config location error", e); - throw new RuntimeException(e); - } - } - - private int parsePort() { - String dpConfigFile = System.getenv("DP_CONFIG_FILE"); - if (StringUtils.isEmpty(dpConfigFile)) { - log.info("dp config file env not found, use default port"); - return defaultPort; - } - - String dpConfigJson = null; - try { - dpConfigJson = Files.readString(Paths.get(dpConfigFile), Charset.defaultCharset()); - } catch (IOException e) { - throw new RuntimeException("dp config file read error", e); - } - - DPConfig dpConfig = JsonUtils.toJavaObject(dpConfigJson, DPConfig.class); - AllocatedPorts allocatedPorts = JsonUtils.toJavaObject(dpConfig.getAllocated_ports(), AllocatedPorts.class); - for (AllocatedPort arrowFlightPort : allocatedPorts.getPorts()) { - if (arrowFlightPort.getName().equals("dp")) { - return arrowFlightPort.getPort(); - } - } - throw new RuntimeException("dp port config not found in " + dpConfigFile); - } - - @Data - private static class DPConfig { - private String allocated_ports; - } - - @Data - private static class AllocatedPorts { - private List ports; - } - - @Data - private static class AllocatedPort { - private String name; - private Integer port; - } -} diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/CompositeFlightProducer.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/CompositeFlightProducer.java new file mode 100644 index 0000000..3975ad5 --- /dev/null +++ b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/CompositeFlightProducer.java @@ -0,0 +1,251 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.server.flight; + +import com.google.protobuf.Any; +import com.google.protobuf.InvalidProtocolBufferException; +import lombok.extern.slf4j.Slf4j; +import org.apache.arrow.flight.Action; +import org.apache.arrow.flight.ActionType; +import org.apache.arrow.flight.CallStatus; +import org.apache.arrow.flight.Criteria; +import org.apache.arrow.flight.FlightDescriptor; +import org.apache.arrow.flight.FlightInfo; +import org.apache.arrow.flight.FlightProducer; +import org.apache.arrow.flight.FlightStream; +import org.apache.arrow.flight.PollInfo; +import org.apache.arrow.flight.PutResult; +import org.apache.arrow.flight.Result; +import org.apache.arrow.flight.SchemaResult; +import org.apache.arrow.flight.Ticket; +import org.secretflow.dataproxy.common.exceptions.DataproxyException; +import org.secretflow.dataproxy.common.utils.GrpcUtils; +import org.secretflow.dataproxy.core.param.ParamWrapper; +import org.secretflow.dataproxy.core.service.TicketService; +import org.secretflow.dataproxy.core.service.impl.CacheTicketService; +import org.secretflow.v1alpha1.kusciaapi.Flightdm; +import org.secretflow.v1alpha1.kusciaapi.Flightinner; + +/** + * Composite flight producer. + * + * @author yuexie + * @date 2024/10/30 16:33 + **/ +@Slf4j +public class CompositeFlightProducer implements FlightProducer { + + private final ProducerRegistry registry; + + public CompositeFlightProducer(ProducerRegistry registry) { + this.registry = registry; + } + + /** + * Return data for a stream. + * + * @param context Per-call context. + * @param ticket The application-defined ticket identifying this stream. + * @param listener An interface for sending data back to the client. + */ + @Override + public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { + + try { + this.getProducer(ticket).getStream(context, ticket, listener); + } catch (Exception e) { + log.error("doGet is happened error", e); + throw CallStatus.INTERNAL + .withCause(e) + .withDescription(e.getMessage()) + .toRuntimeException(); + } + } + + /** + * List available data streams on this service. + * + * @param context Per-call context. + * @param criteria Application-defined criteria for filtering streams. + * @param listener An interface for sending data back to the client. + */ + @Override + public void listFlights(CallContext context, Criteria criteria, StreamListener listener) { + this.getProducer(criteria).listFlights(context, criteria, listener); + } + + /** + * Get information about a particular data stream. + * + * @param context Per-call context. + * @param descriptor The descriptor identifying the data stream. + * @return Metadata about the stream. + */ + @Override + public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { + + log.debug("getFlightInfo, descriptor:{}", descriptor); + return this.getProducer(descriptor).getFlightInfo(context, descriptor); + } + + + /** + * Begin or get an update on execution of a long-running query. + * + *

If the descriptor would begin a query, the server should return a response immediately to not + * block the client. Otherwise, the server should not return an update until progress is made to + * not spam the client with inactionable updates. + * + * @param context Per-call context. + * @param descriptor The descriptor identifying the data stream. + * @return Metadata about execution. + */ + @Override + public PollInfo pollFlightInfo(CallContext context, FlightDescriptor descriptor) { + return this.getProducer(descriptor).pollFlightInfo(context, descriptor); + } + + /** + * Get schema for a particular data stream. + * + * @param context Per-call context. + * @param descriptor The descriptor identifying the data stream. + * @return Schema for the stream. + */ + @Override + public SchemaResult getSchema(CallContext context, FlightDescriptor descriptor) { + try { + return this.getProducer(descriptor).getSchema(context, descriptor); + } catch (DataproxyException e) { + log.error("[getStream] unknown DataproxyException", e); + throw CallStatus.INVALID_ARGUMENT + .withCause(e) + .withDescription(e.getDescription()) + .toRuntimeException(); + } catch (Exception e) { + log.error("[getStream] unknown exception", e); + throw CallStatus.INTERNAL + .withCause(e) + .withDescription("Unknown exception") + .toRuntimeException(); + } + } + + /** + * Accept uploaded data for a particular stream. + * + * @param context Per-call context. + * @param flightStream The data stream being uploaded. + * @param ackStream A stream for sending acknowledgement messages. + * @return A Runnable that will be called when the upload is complete. + */ + @Override + public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener ackStream) { + try { + return this.getProducer(flightStream.getDescriptor()).acceptPut(context, flightStream, ackStream); + } catch (Exception e) { + log.error("Unknown exception", e); + throw CallStatus.INTERNAL + .withCause(e) + .withDescription("Unknown exception:" + e.getMessage()) + .toRuntimeException(); + } + } + + @Override + public void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) { + this.getProducer(reader.getDescriptor()).doExchange(context, reader, writer); + } + + /** + * Generic handler for application-defined RPCs. + * + * @param context Per-call context. + * @param action Client-supplied parameters. + * @param listener A stream of responses. + */ + @Override + public void doAction(CallContext context, Action action, StreamListener listener) { + this.getProducer("").doAction(context, action, listener); + } + + /** + * List available application-defined RPCs. + * + * @param context Per-call context. + * @param listener An interface for sending data back to the client. + */ + @Override + public void listActions(CallContext context, StreamListener listener) { + this.getProducer("").listActions(context, listener); + } + + private FlightProducer getProducer(String serviceName) { + return registry.getOrDefaultNoOp(serviceName); + } + + private FlightProducer getProducer(Ticket ticket) { + + log.debug("getProducer ticket: {}", ticket.getBytes()); + TicketService ticketService = CacheTicketService.getInstance(); + ParamWrapper paramWrapper = ticketService.getParamWrapper(ticket.getBytes()); + log.info("getProducer paramWrapper: {}", paramWrapper); + + if (paramWrapper != null) { + return registry.getOrDefaultNoOp(paramWrapper.producerKey()); + } else { + FlightProducer other = registry.getOrDefaultNoOp("other"); + log.info("getProducer other: {}", other); + return other; + } + } + + private FlightProducer getProducer(Criteria criteria) { + // no impl + return registry.getOrDefaultNoOp(criteria.toString()); + } + + private FlightProducer getProducer(FlightDescriptor descriptor) { + + byte[] command = descriptor.getCommand(); + + Any any = GrpcUtils.parseOrThrow(command); + + try { + String dataSourceType = switch (any.getTypeUrl()) { + case "type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.CommandDataMeshSqlQuery" -> + any.unpack(Flightinner.CommandDataMeshSqlQuery.class).getDatasource().getType(); + case "type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.CommandDataMeshQuery" -> + any.unpack(Flightinner.CommandDataMeshQuery.class).getDatasource().getType(); + case "type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.CommandDataMeshUpdate" -> + any.unpack(Flightinner.CommandDataMeshUpdate.class).getDatasource().getType(); + case "type.googleapis.com/kuscia.proto.api.v1alpha1.datamesh.TicketDomainDataQuery" -> { + String domaindataHandle = any.unpack(Flightdm.TicketDomainDataQuery.class).getDomaindataHandle(); + yield CacheTicketService.getInstance().getParamWrapper(domaindataHandle.getBytes()).producerKey(); + } + + default -> throw CallStatus.INVALID_ARGUMENT + .withDescription("Unknown command type") + .toRuntimeException(); + }; + log.info("odps type is {}", dataSourceType); + return registry.getOrDefaultNoOp(dataSourceType); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException(e); + } + } +} diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/DataproxyProducerImpl.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/DataproxyProducerImpl.java deleted file mode 100644 index adac904..0000000 --- a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/DataproxyProducerImpl.java +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.server.flight; - -import com.google.protobuf.Any; -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.flight.Action; -import org.apache.arrow.flight.ActionType; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.Schema; -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.command.Command; -import org.secretflow.dataproxy.common.model.command.CommandTypeEnum; -import org.secretflow.dataproxy.common.model.command.DatasetReadCommand; -import org.secretflow.dataproxy.common.model.command.DatasetWriteCommand; -import org.secretflow.dataproxy.common.model.dataset.Dataset; -import org.secretflow.dataproxy.common.model.datasource.DatasetLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.Datasource; -import org.secretflow.dataproxy.common.model.datasource.location.OdpsTableInfo; -import org.secretflow.dataproxy.common.utils.GrpcUtils; -import org.secretflow.dataproxy.common.utils.JsonUtils; -import org.secretflow.dataproxy.common.utils.ProtoBufJsonUtils; -import org.secretflow.dataproxy.server.ProtoObjConvertor; -import org.secretflow.dataproxy.service.DataProxyService; -import org.secretflow.dataproxy.service.TicketService; -import org.secretflow.v1alpha1.kusciaapi.Flightdm; -import org.secretflow.v1alpha1.kusciaapi.Flightinner; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * @author muhong - * @date 2023-09-13 16:08 - */ -@Slf4j -@Service -public class DataproxyProducerImpl implements DataproxyProducer { - - private static final Schema DEFAULT_SCHEMA = new Schema(new ArrayList<>()); - @Autowired - private TicketService ticketService; - @Autowired - private DataProxyService dataProxyService; - @Autowired - private Location location; - @Autowired - private BufferAllocator rootAllocator; - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - final Any command = GrpcUtils.parseOrThrow(descriptor.getCommand()); - - try { - if (command.is(Flightinner.CommandDataMeshQuery.class)) { - return getFlightInfoQuery(GrpcUtils.unpackOrThrow(command, Flightinner.CommandDataMeshQuery.class), context, descriptor); - } else if (command.is(Flightinner.CommandDataMeshUpdate.class)) { - return getFlightInfoUpdate(GrpcUtils.unpackOrThrow(command, Flightinner.CommandDataMeshUpdate.class), context, descriptor); - } - } catch (DataproxyException e) { - throw CallStatus.INVALID_ARGUMENT - .withCause(e) - .withDescription(e.getDescription()) - .toRuntimeException(); - } catch (Exception e) { - throw CallStatus.INTERNAL - .withCause(e) - .withDescription("Unknown exception") - .toRuntimeException(); - } - - log.error("[getFlightInfo] unrecognized request, type:{}", command.getTypeUrl()); - throw CallStatus.INVALID_ARGUMENT - .withDescription("Unrecognized request: " + command.getTypeUrl()) - .toRuntimeException(); - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - final Any actionBody = GrpcUtils.parseOrThrow(action.getBody()); - - Result result = null; - try { - - } catch (DataproxyException e) { - throw CallStatus.INVALID_ARGUMENT - .withCause(e) - .withDescription(e.getDescription()) - .toRuntimeException(); - } catch (Exception e) { - throw CallStatus.INTERNAL - .withCause(e) - .withDescription("Unknown exception") - .toRuntimeException(); - } - - if (result != null) { - listener.onNext(result); - listener.onCompleted(); - return; - } - - log.error("[doAction] unrecognized request"); - throw CallStatus.INVALID_ARGUMENT - .withDescription("Unrecognized request: " + actionBody.getTypeUrl()) - .toRuntimeException(); - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - try { - getStreamReadData(context, ticket, listener); - } catch (DataproxyException e) { - throw CallStatus.INVALID_ARGUMENT - .withCause(e) - .withDescription(e.getDescription()) - .toRuntimeException(); - } catch (Exception e) { - log.error("[getStream] unknown exception"); - throw CallStatus.INTERNAL - .withCause(e) - .withDescription("Unknown exception") - .toRuntimeException(); - } - } - - @Override - public void listFlights(CallContext context, Criteria criteria, StreamListener listener) { - - } - - @Override - public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener ackStream) { - try { - return acceptPutDataUpdate(context, flightStream, ackStream); - } catch (DataproxyException e) { - throw CallStatus.INVALID_ARGUMENT - .withCause(e) - .withDescription(e.getDescription()) - .toRuntimeException(); - } catch (Exception e) { - log.error("[acceptPut] unknown exception"); - throw CallStatus.INTERNAL - .withCause(e) - .withDescription("Unknown exception") - .toRuntimeException(); - } - } - - @Override - public void listActions(CallContext context, StreamListener listener) { - // no implements - } - - @Override - public void close() throws Exception { - - } - - @Override - public FlightInfo getFlightInfoQuery(Flightinner.CommandDataMeshQuery command, CallContext context, FlightDescriptor descriptor) { - log.info("[getFlightInfoQuery] get flight info query start"); - - try { - Datasource datasource = ProtoObjConvertor.fromProto(command.getDatasource()); - Dataset dataset = ProtoObjConvertor.fromProto(command.getDomaindata(), datasource); - - // TODO: 不合理入参 - DatasetLocationConfig locationConfig = dataset.getLocationConfig(); - if (locationConfig.getLocationConfig() instanceof OdpsTableInfo odpsTableInfo) { - String partitionSpec = command.getQuery().getPartitionSpec(); - locationConfig.setLocationConfig(new OdpsTableInfo(odpsTableInfo.tableName(), partitionSpec, odpsTableInfo.fields())); - } - - Command readCommand = Command.builder() - .type(CommandTypeEnum.READ) - .commandInfo(DatasetReadCommand.builder() - .connConfig(datasource.getConnConfig()) - .locationConfig(locationConfig) - .formatConfig(dataset.getFormatConfig()) - .schema(dataset.getSchema().getArrowSchema()) - .fieldList(command.getQuery().getColumnsList()) - .outputFormatConfig(ProtoObjConvertor.fromProto(command.getQuery().getContentType())) - .build()) - .build(); - - log.info("[getFlightInfoQuery] get flight info query, command:{}", JsonUtils.toJSONString(readCommand)); - - byte[] ticketBytes = ticketService.generateTicket(readCommand); - - // 数据端,当前只支持1 - List endpointList = Collections.singletonList( - new FlightEndpoint(new Ticket(ticketBytes), location)); - - log.info("[getFlightInfoQuery] get flight info query completed"); - return new FlightInfo(DEFAULT_SCHEMA, descriptor, endpointList, 0, 0); - } catch (DataproxyException e) { - log.error("[getFlightInfoQuery] get flight info query error", e); - throw e; - } catch (Exception e) { - log.error("[getFlightInfoQuery] get flight info query unknown exception", e); - throw DataproxyException.of(DataproxyErrorCode.KUSCIA_GET_FLIGHT_INFO_QUERY_ERROR, e); - } - } - - @Override - public FlightInfo getFlightInfoUpdate(Flightinner.CommandDataMeshUpdate command, CallContext context, FlightDescriptor descriptor) { - log.info("[getFlightInfoUpdate] get flight info update start"); - - try { - Datasource datasource = ProtoObjConvertor.fromProto(command.getDatasource()); - Dataset dataset = ProtoObjConvertor.fromProto(command.getDomaindata(), datasource); - - // TODO: 不合理入参 - DatasetLocationConfig locationConfig = dataset.getLocationConfig(); - if (locationConfig.getLocationConfig() instanceof OdpsTableInfo odpsTableInfo) { - String partitionSpec = command.getUpdate().getPartitionSpec(); - locationConfig.setLocationConfig(new OdpsTableInfo(odpsTableInfo.tableName(), partitionSpec, odpsTableInfo.fields())); - } - - Command writeCommand = Command.builder() - .type(CommandTypeEnum.WRITE) - .commandInfo(DatasetWriteCommand.builder() - .connConfig(datasource.getConnConfig()) - .locationConfig(locationConfig) - .formatConfig(dataset.getFormatConfig()) - .schema(dataset.getSchema().getArrowSchema()) - .inputFormatConfig(ProtoObjConvertor.fromProto(command.getUpdate().getContentType())) - .extraOptions(command.getUpdate().getExtraOptionsMap()) - .build()) - .build(); - - log.info("[getFlightInfoUpdate] get flight info update, command:{}", JsonUtils.toJSONString(writeCommand)); - - byte[] ticketBytes = ticketService.generateTicket(writeCommand); - Flightdm.TicketDomainDataQuery commandTicketWrite = Flightdm.TicketDomainDataQuery.newBuilder() - .setDomaindataHandle(new String(ticketBytes)) - .build(); - - // 数据端,当前只支持1 - List endpointList = Collections.singletonList( - new FlightEndpoint(new Ticket(Any.pack(commandTicketWrite).toByteArray()), location)); - - log.info("[getFlightInfoUpdate] get flight info update completed"); - return new FlightInfo(DEFAULT_SCHEMA, descriptor, endpointList, 0, 0); - } catch (DataproxyException e) { - log.error("[getFlightInfoUpdate] get flight info update error", e); - throw e; - } catch (Exception e) { - log.error("[getFlightInfoUpdate] get flight info update unknown exception", e); - throw DataproxyException.of(DataproxyErrorCode.KUSCIA_GET_FLIGHT_INFO_UPDATE_ERROR, e); - } - } - - public void getStreamReadData(CallContext context, Ticket ticket, ServerStreamListener listener) { - log.info("[getStreamReadData] get stream start, ticket:{}", new String(ticket.getBytes())); - - try { - // 根据ticket获取预先缓存的查询命令 - Command command = ticketService.getCommandByTicket(ticket.getBytes()); - if (command.getType() != CommandTypeEnum.READ) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "操作指令类型与接口不匹配"); - } - - log.info("[getStreamReadData] parse command from ticket success, command:{}", JsonUtils.toJSONString(command)); - try (ArrowReader arrowReader = dataProxyService.generateArrowReader(rootAllocator, (DatasetReadCommand) command.getCommandInfo())) { - listener.start(arrowReader.getVectorSchemaRoot()); - - while (true) { - if (context.isCancelled()) { - log.warn("[getStreamReadData] get stream cancelled"); - break; - } - if (arrowReader.loadNextBatch()) { - listener.putNext(); - } else { - break; - } - } - listener.completed(); - log.info("[getStreamReadData] get stream completed"); - } - } catch (DataproxyException e) { - log.error("[getStreamReadData] get stream error", e); - throw e; - } catch (Exception e) { - log.error("[getStreamReadData] get stream unknown exception", e); - throw DataproxyException.of(DataproxyErrorCode.KUSCIA_GET_STREAM_ERROR, e); - } - } - - public Runnable acceptPutDataUpdate(CallContext context, FlightStream flightStream, StreamListener ackStream) { - log.info("[acceptPutDataUpdate] accept put data (update) start"); - - try { - final Any acceptPutCommand = GrpcUtils.parseOrThrow(flightStream.getDescriptor().getCommand()); - if (!acceptPutCommand.is(Flightdm.TicketDomainDataQuery.class)) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "操作指令类型与接口不匹配"); - } - - Flightdm.TicketDomainDataQuery ticketDomainDataQuery = GrpcUtils.unpackOrThrow(acceptPutCommand, Flightdm.TicketDomainDataQuery.class); - log.info("[acceptPutDataUpdate] parse ticketDomainDataQuery success, ticketDomainDataQuery:{}", ProtoBufJsonUtils.toJSONString(ticketDomainDataQuery)); - - Command command = ticketService.getCommandByTicket(ticketDomainDataQuery.getDomaindataHandle().getBytes()); - log.info("[acceptPutDataUpdate] parse command from ticket success, command:{}", JsonUtils.toJSONString(command)); - if (command.getType() != CommandTypeEnum.WRITE) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "操作指令类型与接口不匹配"); - } - - return () -> { - try { - dataProxyService.datasetWrite((DatasetWriteCommand) command.getCommandInfo(), flightStream, - root -> { - String msg = "row count: " + root.getRowCount(); - try (final ArrowBuf buffer = rootAllocator.buffer(msg.getBytes(StandardCharsets.UTF_8).length)) { - buffer.writeBytes(msg.getBytes(StandardCharsets.UTF_8)); - ackStream.onNext(PutResult.metadata(buffer)); - } - }); - } catch (DataproxyException e) { - throw CallStatus.INTERNAL - .withCause(e) - .withDescription(e.getDescription()) - .toRuntimeException(); - } - }; - } catch (DataproxyException e) { - log.error("[acceptPutDataUpdate] accept put data (update) error", e); - throw e; - } catch (Exception e) { - log.error("[acceptPutDataUpdate] accept put data (update) unknown exception", e); - throw DataproxyException.of(DataproxyErrorCode.KUSCIA_ACCEPT_PUT_ERROR, e); - } - } -} diff --git a/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/ProducerRegistry.java b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/ProducerRegistry.java new file mode 100644 index 0000000..ec17c7f --- /dev/null +++ b/dataproxy-server/src/main/java/org/secretflow/dataproxy/server/flight/ProducerRegistry.java @@ -0,0 +1,56 @@ +/* + * Copyright 2024 Ant Group Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.secretflow.dataproxy.server.flight; + +import org.apache.arrow.flight.FlightProducer; +import org.apache.arrow.flight.NoOpFlightProducer; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Aggregation FlightProducer registry. + * + * @author yuexie + * @date 2024/10/30 16:35 + **/ +public class ProducerRegistry { + + private static final ProducerRegistry INSTANCE = new ProducerRegistry(); + + private final Map producers = new ConcurrentHashMap<>(8); + + private final NoOpFlightProducer noOpFlightProducer = new NoOpFlightProducer(); + + private ProducerRegistry() { + // Private constructor to prevent instantiation from outside + } + + public static ProducerRegistry getInstance() { + return INSTANCE; + } + + public void register(String key, FlightProducer service) { + // Implementation to register a service + producers.put(key, service); + } + + public FlightProducer getOrDefaultNoOp(String key) { + // Implementation to retrieve a service by name + return producers.getOrDefault(key, noOpFlightProducer); + } +} diff --git a/dataproxy-server/src/main/resources/application.yaml b/dataproxy-server/src/main/resources/application.yaml deleted file mode 100644 index 614120e..0000000 --- a/dataproxy-server/src/main/resources/application.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2024 Ant Group Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -spring: - # profiles: - # active: local - autoconfigure: - exclude: org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration - application: - name: dataproxy - servlet: - multipart: - max-file-size: -1 - max-request-size: -1 - file-size-threshold: -1 - -logging: - level: - root: info - file: - path: "./logs" - -dataproxy: - flight: - host: 127.0.0.1 # getFlightInfo 返回的endpoint ip - port: 8023 - ticket: - timeout: 300 # 过期时间,单位秒 - onlyOnce: true # 是否一次性,true:一次性使用,false:允许多次调用,超时销毁 \ No newline at end of file diff --git a/dataproxy-server/src/main/resources/logback.xml b/dataproxy-server/src/main/resources/logback.xml new file mode 100644 index 0000000..9f1897d --- /dev/null +++ b/dataproxy-server/src/main/resources/logback.xml @@ -0,0 +1,29 @@ + + + + + log/dataproxy.log + + log/dataproxy.%d{yyyy-MM-dd}.log + 30 + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} %line - %msg%n + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + \ No newline at end of file diff --git a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/DataProxyService.java b/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/DataProxyService.java deleted file mode 100644 index 42a0e0e..0000000 --- a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/DataProxyService.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.service; - -import org.secretflow.dataproxy.common.model.InferSchemaResult; -import org.secretflow.dataproxy.common.model.command.DatasetReadCommand; -import org.secretflow.dataproxy.common.model.command.DatasetWriteCommand; -import org.secretflow.dataproxy.common.model.dataset.DatasetFormatConfig; -import org.secretflow.dataproxy.common.model.datasource.DatasetLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.DatasourceConnConfig; -import org.secretflow.dataproxy.manager.Connector; - -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowReader; - -/** - * @author muhong - * @date 2023-09-01 11:01 - */ -public interface DataProxyService { - - /** - * 构造数据源连接器 - * - * @param connConfig 数据源连接信息 - * @return 数据源连接器 - */ - Connector buildConnector(DatasourceConnConfig connConfig); - - /** - * 校验数据源连接参数 - * - * @param connConfig 数据源连接信息 - */ - - void validateConn(DatasourceConnConfig connConfig); - - /** - * 推断数据结构 - * - * @param allocator 内存分配器 - * @param connConfig 数据源连接信息 - * @param locationConfig 数据集位置信息 - * @param formatConfig 数据集格式信息 - * @return 数据结构及详细格式信息 - */ - InferSchemaResult inferSchema(BufferAllocator allocator, DatasourceConnConfig connConfig, DatasetLocationConfig locationConfig, DatasetFormatConfig formatConfig); - - /** - * 数据读取 - * - * @param allocator 内存分配器 - * @param readCommand 数据读取指令 - * @return Arrow流式数据读取对象 - */ - ArrowReader generateArrowReader(BufferAllocator allocator, DatasetReadCommand readCommand); - - /** - * 数据存储 - * - * @param writeCommand 数据存储指令 - * @param flightStream 待存储Arrow数据流 - * @param writeCallback 单块存储完成回调 - */ - void datasetWrite(DatasetWriteCommand writeCommand, FlightStream flightStream, WriteCallback writeCallback); - - /** - * 单块数据存储完成回调 - */ - interface WriteCallback { - void ack(VectorSchemaRoot root); - } -} \ No newline at end of file diff --git a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/DataProxyServiceDirectImpl.java b/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/DataProxyServiceDirectImpl.java deleted file mode 100644 index b35c553..0000000 --- a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/DataProxyServiceDirectImpl.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.service.impl; - -import com.github.benmanes.caffeine.cache.Cache; -import com.github.benmanes.caffeine.cache.Caffeine; -import lombok.extern.slf4j.Slf4j; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.commons.collections4.CollectionUtils; -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.InferSchemaResult; -import org.secretflow.dataproxy.common.model.command.DatasetReadCommand; -import org.secretflow.dataproxy.common.model.command.DatasetWriteCommand; -import org.secretflow.dataproxy.common.model.dataset.DatasetFormatConfig; -import org.secretflow.dataproxy.common.model.datasource.DatasetLocationConfig; -import org.secretflow.dataproxy.common.model.datasource.DatasourceConnConfig; -import org.secretflow.dataproxy.common.model.datasource.conn.JdbcBaseConnConfig; -import org.secretflow.dataproxy.common.utils.JsonUtils; -import org.secretflow.dataproxy.manager.Connector; -import org.secretflow.dataproxy.manager.DataReader; -import org.secretflow.dataproxy.manager.DataWriter; -import org.secretflow.dataproxy.manager.connector.filesystem.FileSystemConnector; -import org.secretflow.dataproxy.manager.connector.odps.OdpsConnector; -import org.secretflow.dataproxy.manager.connector.rdbms.JdbcConnector; -import org.secretflow.dataproxy.service.DataProxyService; -import org.springframework.stereotype.Service; - -import javax.annotation.PostConstruct; - -/** - * 简单数据处理中心实现(数据直传) - * - * @author muhong - * @date 2023-09-01 17:12 - */ -@Slf4j -@Service -public class DataProxyServiceDirectImpl implements DataProxyService { - - protected Cache connectorCache; - - @PostConstruct - private void init() { - connectorCache = Caffeine.newBuilder() - .maximumSize(100) - .removalListener((key, connector, cause) -> { - if (connector != null) { - try { - ((Connector) connector).close(); - log.info("[DataProxyServiceDirectImpl] remove item from connector cache success, cause:{}, key:{}", cause, key); - } catch (Exception e) { - log.error("[DataProxyServiceDirectImpl] remove item from connector cache failed, because connector close failed, conn config: {}", - key, e); - } - } - }) - .build(); - } - - /** - * 构建数据源连接器 - * - * @param connConfig 数据源连接信息 - * @return 数据源连接器 - */ - @Override - public synchronized Connector buildConnector(DatasourceConnConfig connConfig) { - String key = connConfig.generateUniqueId(); - - Connector connector = connectorCache.getIfPresent(key); - if (connector != null) { - if (connector.isAvailable()) { - return connector; - } else { - connectorCache.invalidate(key); - } - } - - if (connConfig.getType() == null) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "数据源类型字段缺失"); - } - - switch (connConfig.getType()) { - case MYSQL: { - // 连接信息缺失校验 - if (connConfig.getConnConfig() == null) { - throw DataproxyException.of(DataproxyErrorCode.PARAMS_NOT_EXIST_ERROR, "数据源连接信息字段缺失"); - } - connector = new JdbcConnector(connConfig.getType(), (JdbcBaseConnConfig) connConfig.getConnConfig()); - break; - } - case MINIO: - case OSS: - case OBS: - case LOCAL_HOST: - connector = new FileSystemConnector(connConfig.getType(), connConfig.getConnConfig()); - break; - case ODPS: - connector = new OdpsConnector(connConfig.getConnConfig()); - break; - default: - throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "不支持的数据源类型 " + connConfig.getType()); - } - connectorCache.put(key, connector); - return connector; - } - - @Override - public void validateConn(DatasourceConnConfig connConfig) { - // 能构建出connector,就说明连接正常 - buildConnector(connConfig); - } - - @Override - public InferSchemaResult inferSchema(BufferAllocator allocator, DatasourceConnConfig connConfig, DatasetLocationConfig locationConfig, DatasetFormatConfig formatConfig) { - Connector connector = buildConnector(connConfig); - return connector.inferSchema(allocator, locationConfig.getLocationConfig(), formatConfig); - } - - @Override - public ArrowReader generateArrowReader(BufferAllocator allocator, DatasetReadCommand readCommand) { - Connector connector = buildConnector(readCommand.getConnConfig()); - - // 补充formatConfig中缺失参数 - InferSchemaResult inferSchemaResult = inferSchema(allocator, readCommand.getConnConfig(), - readCommand.getLocationConfig(), readCommand.getFormatConfig()); - readCommand.setFormatConfig(inferSchemaResult.getDatasetFormatConfig()); - // 当schema缺省时进行推断 - if (readCommand.getSchema() == null) { - readCommand.setSchema(inferSchemaResult.getSchema()); - } - - DataReader dataReader = connector.buildReader(allocator, readCommand); - return dataReader.createSplitReader(1).get(0).startRead(); - } - - @Override - public void datasetWrite(DatasetWriteCommand writeCommand, FlightStream flightStream, WriteCallback writeCallback) { - - try (Connector connector = buildConnector(writeCommand.getConnConfig())) { - VectorSchemaRoot batch = flightStream.getRoot(); - - if (writeCommand.getSchema() == null || CollectionUtils.isEmpty(writeCommand.getSchema().getFields())) { - writeCommand.setSchema(batch.getSchema()); - } - - int batchSize = 0; - - try (DataWriter dataWriter = connector.buildWriter(writeCommand)) { - while (flightStream.next()) { - dataWriter.write(batch); - // 调用写回调 - writeCallback.ack(batch); - log.info("[datasetWrite] dataset batch write is successful"); - batchSize += batch.getRowCount(); - } - dataWriter.flush(); - log.info("[datasetWrite] dataset write over, total size: {}", batchSize); - } - } catch (DataproxyException e) { - log.error("[datasetWrite] dataset write error, cmd: {}", JsonUtils.toJSONString(writeCommand), e); - throw e; - } catch (Exception e) { - log.error("[datasetWrite] dataset write unknown exception, cmd: {}", JsonUtils.toJSONString(writeCommand), e); - throw DataproxyException.of(DataproxyErrorCode.DATASET_WRITE_ERROR, e); - } - } - -} \ No newline at end of file diff --git a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/TicketServiceImpl.java b/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/TicketServiceImpl.java deleted file mode 100644 index 8c27b1d..0000000 --- a/dataproxy-service/src/main/java/org/secretflow/dataproxy/service/impl/TicketServiceImpl.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2023 Ant Group Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.secretflow.dataproxy.service.impl; - -import org.secretflow.dataproxy.common.exceptions.DataproxyErrorCode; -import org.secretflow.dataproxy.common.exceptions.DataproxyException; -import org.secretflow.dataproxy.common.model.command.Command; -import org.secretflow.dataproxy.common.utils.IdUtils; -import org.secretflow.dataproxy.service.TicketService; - -import com.github.benmanes.caffeine.cache.Cache; -import com.github.benmanes.caffeine.cache.Caffeine; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Service; - -import javax.annotation.PostConstruct; -import java.nio.charset.StandardCharsets; -import java.util.concurrent.TimeUnit; - -/** - * ticket服务实现类 - * - * @author muhong - * @date 2023-08-31 11:50 - */ -@Slf4j -@Service -public class TicketServiceImpl implements TicketService { - - /** - * 超时时间 - */ - @Value("${dataproxy.ticket.timeout}") - private int timeout = 300; - - /** - * 是否一次性使用 - */ - @Value("${dataproxy.ticket.onlyOnce}") - private boolean onlyOnce; - - private Cache ticketCache; - - @PostConstruct - private void init() { - // ticket暂时采用本地缓存方式实现 - ticketCache = Caffeine.newBuilder() - .initialCapacity(5) - .maximumSize(10) - // 过期时间为5分钟 - .expireAfterWrite(timeout, TimeUnit.SECONDS) - .build(); - } - - @Override - public byte[] generateTicket(Command command) { - String ticket = IdUtils.randomUUID(); - ticketCache.put(ticket, command); - return ticket.getBytes(StandardCharsets.UTF_8); - } - - @Override - public synchronized Command getCommandByTicket(byte[] ticket) { - String ticketStr = new String(ticket); - - Command command = ticketCache.getIfPresent(ticketStr); - if (command == null) { - throw DataproxyException.of(DataproxyErrorCode.TICKET_UNAVAILABLE); - } - - if (onlyOnce) { - // ticket只允许被消费一次 - ticketCache.invalidate(ticketStr); - } - return command; - } -} diff --git a/dataproxy_sdk/bazel/repositories.bzl b/dataproxy_sdk/bazel/repositories.bzl index 39c9e3b..2a246a0 100644 --- a/dataproxy_sdk/bazel/repositories.bzl +++ b/dataproxy_sdk/bazel/repositories.bzl @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") @@ -51,14 +52,22 @@ def _yacl(): ) def _kuscia(): + # maybe( + # http_archive, + # name = "kuscia", + # urls = [ + # "https://github.com/secretflow/kuscia/archive/refs/tags/v0.11.0b0.tar.gz", + # ], + # strip_prefix = "kuscia-0.11.0b0", + # sha256 = "c8de425a5f442ba3fa30a9b5943f9fd056efd9ab610ddc2168d5ffcf71224974", + # ) + + # TODO:发版需要替换成github地址 maybe( - http_archive, + git_repository, name = "kuscia", - urls = [ - "https://github.com/secretflow/kuscia/archive/refs/tags/v0.11.0b0.tar.gz", - ], - strip_prefix = "kuscia-0.11.0b0", - sha256 = "c8de425a5f442ba3fa30a9b5943f9fd056efd9ab610ddc2168d5ffcf71224974", + commit = "04b5f468a397a0a9e54b34a461fcd6e81b2aad9a", + remote = "git@code.alipay.com:secretflow/kuscia.git", ) def _bazel_rules_pkg(): diff --git a/dataproxy_sdk/cc/BUILD.bazel b/dataproxy_sdk/cc/BUILD.bazel index a848258..2679d8d 100644 --- a/dataproxy_sdk/cc/BUILD.bazel +++ b/dataproxy_sdk/cc/BUILD.bazel @@ -23,6 +23,7 @@ dataproxy_cc_library( ], deps = [ ":data_proxy_file", + ":data_proxy_stream", ], ) @@ -92,6 +93,19 @@ dataproxy_cc_library( ], ) +dataproxy_cc_library( + name = "data_proxy_stream", + srcs = ["data_proxy_stream.cc"], + hdrs = ["data_proxy_stream.h"], + deps = [ + ":data_proxy_conn", + ":exception", + ":proto", + ":utils", + "@org_apache_arrow//:arrow_flight", + ], +) + dataproxy_cc_test( name = "file_help_test", srcs = ["file_help_test.cc"], @@ -130,3 +144,13 @@ dataproxy_cc_test( ":proto", ], ) + +dataproxy_cc_test( + name = "data_proxy_stream_test", + srcs = ["data_proxy_stream_test.cc"], + deps = [ + ":data_proxy_stream", + "//dataproxy_sdk/test:data_mesh_mock", + "//dataproxy_sdk/test:random", + ], +) diff --git a/dataproxy_sdk/cc/api.h b/dataproxy_sdk/cc/api.h index 8de3c3b..e8a61aa 100644 --- a/dataproxy_sdk/cc/api.h +++ b/dataproxy_sdk/cc/api.h @@ -14,4 +14,5 @@ #pragma once -#include "dataproxy_sdk/cc/data_proxy_file.h" \ No newline at end of file +#include "dataproxy_sdk/cc/data_proxy_file.h" +#include "dataproxy_sdk/cc/data_proxy_stream.h" \ No newline at end of file diff --git a/dataproxy_sdk/cc/data_proxy_file.cc b/dataproxy_sdk/cc/data_proxy_file.cc index b09aa38..31fbe0b 100644 --- a/dataproxy_sdk/cc/data_proxy_file.cc +++ b/dataproxy_sdk/cc/data_proxy_file.cc @@ -31,7 +31,7 @@ namespace dataproxy_sdk { class DataProxyFile::Impl { public: - void Init(const proto::DataProxyConfig &config) { + void Init(const proto::DataProxyConfig& config) { arrow::flight::FlightClientOptions options = arrow::flight::FlightClientOptions::Defaults(); if (config.has_tls_config()) { @@ -47,7 +47,7 @@ class DataProxyFile::Impl { config.has_tls_config(), options); } - FileHelpWrite::Options BuildWriteOptions(const proto::DownloadInfo &info) { + FileHelpWrite::Options BuildWriteOptions(const proto::DownloadInfo& info) { FileHelpWrite::Options options = FileHelpWrite::Options::Defaults(); if (info.has_orc_info()) { options.compression = @@ -58,8 +58,8 @@ class DataProxyFile::Impl { return options; } - void DownloadFile(const proto::DownloadInfo &info, - const std::string &file_path, + void DownloadFile(const proto::DownloadInfo& info, + const std::string& file_path, proto::FileFormat file_format) { // 1. 从dm获取dp信息 auto any = BuildDownloadAny(info, file_format); @@ -91,15 +91,15 @@ class DataProxyFile::Impl { file_write->DoClose(); } - FileHelpRead::Options BuildReadOptions(const proto::UploadInfo &info) { + FileHelpRead::Options BuildReadOptions(const proto::UploadInfo& info) { FileHelpRead::Options options = FileHelpRead::Options::Defaults(); - for (auto &column : info.columns()) { + for (auto& column : info.columns()) { options.column_types.emplace(column.name(), GetDataType(column.type())); } return options; } - void DoUpload(const proto::UploadInfo &info, const std::string &file_path, + void DoUpload(const proto::UploadInfo& info, const std::string& file_path, proto::FileFormat file_format) { // 2. 通过dm返回的dp信息连接dp auto any = BuildUploadAny(info, file_format); @@ -148,7 +148,7 @@ class DataProxyFile::Impl { file_read->DoClose(); } - void CreateDomainData(proto::UploadInfo &info, + void CreateDomainData(proto::UploadInfo& info, proto::FileFormat file_format) { auto action_msg = BuildActionCreateDomainDataRequest(info, file_format); arrow::flight::Action action{ @@ -163,13 +163,14 @@ class DataProxyFile::Impl { CHECK_RESP_OR_THROW(response); if (info.domaindata_id().empty()) { info.set_domaindata_id(response.data().domaindata_id()); + SPDLOG_INFO("DP create domaindata id:{}", info.domaindata_id()); } else if (response.data().domaindata_id() != info.domaindata_id()) { DATAPROXY_THROW("domaindata id error, request:{}, response:{}", info.domaindata_id(), response.data().domaindata_id()); } } - void DeleteDomainData(const proto::UploadInfo &info) { + void DeleteDomainData(const proto::UploadInfo& info) { auto action_request = BuildActionDeleteDomainDataRequest(info); arrow::flight::Action action{ "ActionDeleteDomainDataRequest", @@ -177,16 +178,18 @@ class DataProxyFile::Impl { auto result = dp_conn_->DoAction(action); } - void UploadFile(proto::UploadInfo &info, const std::string &file_path, + void UploadFile(proto::UploadInfo& info, const std::string& file_path, proto::FileFormat file_format) { - dataproxy_sdk::CheckUploadInfo(info); + CheckUploadInfo(info); CreateDomainData(info, file_format); try { DoUpload(info, file_path, file_format); } catch (...) { try { DeleteDomainData(info); - } catch (const std::exception &e) { + SPDLOG_WARN("file upload error. upload_info:{}", + info.SerializeAsString()); + } catch (const std::exception& e) { SPDLOG_WARN("DeleteDomainData error. msg:{}", e.what()); } throw; @@ -200,7 +203,7 @@ class DataProxyFile::Impl { }; std::unique_ptr DataProxyFile::Make( - const proto::DataProxyConfig &config) { + const proto::DataProxyConfig& config) { proto::DataProxyConfig dp_config; dp_config.CopyFrom(config); GetDPConfigValueFromEnv(&dp_config); @@ -221,14 +224,14 @@ DataProxyFile::DataProxyFile() { DataProxyFile::~DataProxyFile() = default; -void DataProxyFile::DownloadFile(const proto::DownloadInfo &info, - const std::string &file_path, +void DataProxyFile::DownloadFile(const proto::DownloadInfo& info, + const std::string& file_path, proto::FileFormat file_format) { impl_->DownloadFile(info, file_path, file_format); } -void DataProxyFile::UploadFile(proto::UploadInfo &info, - const std::string &file_path, +void DataProxyFile::UploadFile(proto::UploadInfo& info, + const std::string& file_path, proto::FileFormat file_format) { impl_->UploadFile(info, file_path, file_format); } diff --git a/dataproxy_sdk/cc/data_proxy_pb.cc b/dataproxy_sdk/cc/data_proxy_pb.cc index 5d2a786..24716cf 100644 --- a/dataproxy_sdk/cc/data_proxy_pb.cc +++ b/dataproxy_sdk/cc/data_proxy_pb.cc @@ -46,29 +46,49 @@ inline kuscia_proto::FileFormat ChangeToKusciaFileFormat( } google::protobuf::Any BuildDownloadAny(const proto::DownloadInfo& info, - proto::FileFormat file_format) { - google::protobuf::Any any; + proto::ContentType content_type) { proto::CommandDomainDataQuery msg; msg.set_domaindata_id(info.domaindata_id()); msg.set_partition_spec(info.partition_spec()); - msg.set_content_type(FormatToContentType(file_format)); + msg.set_content_type(content_type); + google::protobuf::Any any; any.PackFrom(msg); return any; } +google::protobuf::Any BuildDownloadAny(const proto::DownloadInfo& info, + proto::FileFormat file_format) { + return BuildDownloadAny(info, FormatToContentType(file_format)); +} + google::protobuf::Any BuildUploadAny(const proto::UploadInfo& info, - proto::FileFormat file_format) { - google::protobuf::Any any; + proto::ContentType content_type) { proto::CommandDomainDataUpdate msg; msg.set_domaindata_id(info.domaindata_id()); - msg.set_content_type(FormatToContentType(file_format)); - if (file_format != proto::FileFormat::BINARY) { + msg.set_content_type(content_type); + if (content_type != proto::ContentType::RAW) { msg.mutable_file_write_options() ->mutable_csv_options() ->set_field_delimiter(","); } + google::protobuf::Any any; + any.PackFrom(msg); + return any; +} + +google::protobuf::Any BuildUploadAny(const proto::UploadInfo& info, + proto::FileFormat file_format) { + return BuildUploadAny(info, FormatToContentType(file_format)); +} + +google::protobuf::Any BuildSQLAny(const proto::SQLInfo& info) { + proto::CommandDataSourceSqlQuery msg; + msg.set_datasource_id(info.datasource_id()); + msg.set_sql(info.sql()); + + google::protobuf::Any any; any.PackFrom(msg); return any; } diff --git a/dataproxy_sdk/cc/data_proxy_pb.h b/dataproxy_sdk/cc/data_proxy_pb.h index df23ab9..9fe4736 100644 --- a/dataproxy_sdk/cc/data_proxy_pb.h +++ b/dataproxy_sdk/cc/data_proxy_pb.h @@ -27,12 +27,20 @@ using namespace kuscia::proto::api::v1alpha1::datamesh; namespace dm_proto = kuscia::proto::api::v1alpha1::datamesh; namespace kuscia_proto = kuscia::proto::api::v1alpha1; +google::protobuf::Any BuildDownloadAny(const proto::DownloadInfo& info, + proto::ContentType content_type); + google::protobuf::Any BuildDownloadAny(const proto::DownloadInfo& info, proto::FileFormat file_format); +google::protobuf::Any BuildUploadAny(const proto::UploadInfo& info, + proto::ContentType content_type); + google::protobuf::Any BuildUploadAny(const proto::UploadInfo& info, proto::FileFormat file_format); +google::protobuf::Any BuildSQLAny(const proto::SQLInfo& info); + proto::CreateDomainDataRequest BuildActionCreateDomainDataRequest( const proto::UploadInfo& info, proto::FileFormat file_format); diff --git a/dataproxy_sdk/cc/data_proxy_stream.cc b/dataproxy_sdk/cc/data_proxy_stream.cc new file mode 100644 index 0000000..07f197f --- /dev/null +++ b/dataproxy_sdk/cc/data_proxy_stream.cc @@ -0,0 +1,248 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dataproxy_sdk/cc/data_proxy_stream.h" + +#include "arrow/buffer.h" +#include "arrow/flight/api.h" +#include "spdlog/spdlog.h" + +#include "dataproxy_sdk/cc/data_proxy_conn.h" +#include "dataproxy_sdk/cc/exception.h" +#include "dataproxy_sdk/cc/utils.h" + +namespace dataproxy_sdk { + +class SimpleStreamReader : public DataProxyStreamReader { + public: + explicit SimpleStreamReader( + std::unique_ptr wrapper) + : DataProxyStreamReader(), wrapper_(std::move(wrapper)) {} + + public: + void Get(std::shared_ptr* batch) { + *batch = wrapper_->ReadRecordBatch(); + } + + std::shared_ptr Schema() { return wrapper_->GetSchema(); } + + private: + std::unique_ptr wrapper_; +}; + +class SimpleStreamWriter : public DataProxyStreamWriter { + public: + SimpleStreamWriter(std::unique_ptr wrapper, + const proto::UploadInfo& upload_info) + : wrapper_(std::move(wrapper)), + upload_info_(upload_info), + closed_(false) {} + virtual ~SimpleStreamWriter() { Close(); }; + + void SetStream(std::shared_ptr stream) { + stream_ = std::move(stream); + } + + public: + void Put(const std::shared_ptr& batch) { + try { + wrapper_->WriteRecordBatch(*batch); + } catch (...) { + try { + if (stream_) { + stream_->DeleteDomainData(upload_info_); + } + } catch (const std::exception& e) { + SPDLOG_WARN("DeleteDomainData error. msg:{}", e.what()); + } + throw; + } + } + + void Close() { + if (!closed_) { + wrapper_->Close(); + closed_ = true; + } + } + + private: + std::unique_ptr wrapper_; + std::shared_ptr stream_; + const proto::UploadInfo upload_info_; + bool closed_; +}; + +class DataProxyStream::Impl { + public: + void Init(const proto::DataProxyConfig& config) { + arrow::flight::FlightClientOptions options = + arrow::flight::FlightClientOptions::Defaults(); + if (config.has_tls_config()) { + options.private_key = + ReadFileContent(config.tls_config().private_key_path()); + options.cert_chain = + ReadFileContent(config.tls_config().certificate_path()); + options.tls_root_certs = + ReadFileContent(config.tls_config().ca_file_path()); + } + + dp_conn_ = DataProxyConn::Connect(config.data_proxy_addr(), + config.has_tls_config(), options); + } + + std::unique_ptr GetReader( + const google::protobuf::Any& any) { + auto descriptor = + arrow::flight::FlightDescriptor::Command(any.SerializeAsString()); + auto stream_reader = dp_conn_->DoGet(descriptor); + + return std::make_unique(std::move(stream_reader)); + } + + std::shared_ptr BuildWriterSchema( + const proto::UploadInfo& info) { + arrow::SchemaBuilder schema_builder; + for (auto& column : info.columns()) { + CHECK_ARROW_OR_THROW(schema_builder.AddField(arrow::field( + column.name(), GetDataType(column.type()), !column.not_nullable()))); + } + ASSIGN_DP_OR_THROW(auto ret, schema_builder.Finish()) + return ret; + } + + void CreateDomainData(proto::UploadInfo& info, + proto::FileFormat file_format) { + auto action_msg = BuildActionCreateDomainDataRequest(info, file_format); + arrow::flight::Action action{ + "ActionCreateDomainDataRequest", + arrow::Buffer::FromString(action_msg.SerializeAsString())}; + auto result_stream = dp_conn_->DoAction(action); + + std::unique_ptr result; + ASSIGN_ARROW_OR_THROW(result, result_stream->Next()); + + auto response = GetActionCreateDomainDataResponse(result->body->ToString()); + CHECK_RESP_OR_THROW(response); + if (info.domaindata_id().empty()) { + info.set_domaindata_id(response.data().domaindata_id()); + SPDLOG_INFO("DP create domaindata id:{}", info.domaindata_id()); + } else if (response.data().domaindata_id() != info.domaindata_id()) { + DATAPROXY_THROW("domaindata id error, request:{}, response:{}", + info.domaindata_id(), response.data().domaindata_id()); + } + } + + void DeleteDomainData(const proto::UploadInfo& info) { + auto action_request = BuildActionDeleteDomainDataRequest(info); + arrow::flight::Action action{ + "ActionDeleteDomainDataRequest", + arrow::Buffer::FromString(action_request.SerializeAsString())}; + auto result = dp_conn_->DoAction(action); + } + + std::unique_ptr GetWriter(proto::UploadInfo& info) { + auto content_type = proto::ContentType::Table; + auto file_format = proto::FileFormat::CSV; + if (info.type() != "table") { + content_type = proto::ContentType::RAW; + file_format = proto::FileFormat::BINARY; + } + + CheckUploadInfo(info); + CreateDomainData(info, file_format); + + try { + // 1. 从dm获取dp信息 + auto any = BuildUploadAny(info, content_type); + + // 2. 通过dm返回的dp信息连接dp + auto descriptor = + arrow::flight::FlightDescriptor::Command(any.SerializeAsString()); + + auto schema = BuildWriterSchema(info); + + auto put_result = dp_conn_->DoPut(descriptor, schema); + + return std::make_unique(std::move(put_result), info); + } catch (...) { + try { + DeleteDomainData(info); + } catch (const std::exception& e) { + SPDLOG_WARN("DeleteDomainData error. msg:{}", e.what()); + } + throw; + } + } + + void Close() { dp_conn_->Close(); } + + private: + std::unique_ptr dp_conn_; +}; + +std::shared_ptr DataProxyStream::Make( + const proto::DataProxyConfig& config) { + proto::DataProxyConfig dp_config; + dp_config.CopyFrom(config); + GetDPConfigValueFromEnv(&dp_config); + + std::shared_ptr ret = std::make_shared(); + ret->impl_->Init(dp_config); + return ret; +} + +std::shared_ptr DataProxyStream::Make() { + proto::DataProxyConfig config; + return DataProxyStream::Make(config); +} + +DataProxyStream::DataProxyStream() { + impl_ = std::make_unique(); +} + +DataProxyStream::~DataProxyStream() = default; + +std::unique_ptr DataProxyStream::GetReader( + const proto::DownloadInfo& info) { + // TODO: + // proto::ContentType::Table暂时固定,因为在read时dp的odps没有用到这个字段 + // 后续DataProxy会在CommandDomainDataQuery会调整该参数 + auto any = BuildDownloadAny(info, proto::ContentType::Table); + + return impl_->GetReader(any); +} + +std::unique_ptr DataProxyStream::GetReader( + const proto::SQLInfo& info) { + auto any = BuildSQLAny(info); + + return impl_->GetReader(any); +} + +std::unique_ptr DataProxyStream::GetWriter( + proto::UploadInfo& info) { + auto ret = impl_->GetWriter(info); + ret->SetStream(shared_from_this()); + return ret; +} + +void DataProxyStream::Close() { impl_->Close(); } + +void DataProxyStream::DeleteDomainData(const proto::UploadInfo& info) { + impl_->DeleteDomainData(info); + SPDLOG_WARN("stream write error. upload_info:{}", info.SerializeAsString()); +} + +} // namespace dataproxy_sdk diff --git a/dataproxy_sdk/cc/data_proxy_stream.h b/dataproxy_sdk/cc/data_proxy_stream.h new file mode 100644 index 0000000..f2e3fd9 --- /dev/null +++ b/dataproxy_sdk/cc/data_proxy_stream.h @@ -0,0 +1,71 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "arrow/record_batch.h" + +#include "dataproxy_sdk/cc/data_proxy_pb.h" + +namespace dataproxy_sdk { + +class DataProxyStreamReader { + public: + virtual ~DataProxyStreamReader() = default; + + public: + virtual void Get(std::shared_ptr* batch) = 0; + virtual std::shared_ptr Schema() = 0; +}; + +class DataProxyStreamWriter { + public: + virtual ~DataProxyStreamWriter() = default; + + public: + virtual void Put(const std::shared_ptr& batch) = 0; + virtual void Close() = 0; +}; + +class DataProxyStream : public std::enable_shared_from_this { + public: + static std::shared_ptr Make( + const proto::DataProxyConfig& config); + + static std::shared_ptr Make(); + + public: + DataProxyStream(); + ~DataProxyStream(); + + public: + std::unique_ptr GetReader( + const proto::DownloadInfo& info); + + std::unique_ptr GetReader(const proto::SQLInfo& info); + + std::unique_ptr GetWriter(proto::UploadInfo& info); + + void Close(); + + void DeleteDomainData(const proto::UploadInfo& info); + + private: + class Impl; + std::unique_ptr impl_; +}; + +} // namespace dataproxy_sdk \ No newline at end of file diff --git a/dataproxy_sdk/cc/data_proxy_stream_test.cc b/dataproxy_sdk/cc/data_proxy_stream_test.cc new file mode 100644 index 0000000..fb4c2f5 --- /dev/null +++ b/dataproxy_sdk/cc/data_proxy_stream_test.cc @@ -0,0 +1,77 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dataproxy_sdk/cc/data_proxy_stream.h" + +#include "arrow/type.h" +#include "gtest/gtest.h" + +#include "dataproxy_sdk/cc/exception.h" +#include "dataproxy_sdk/test/data_mesh_mock.h" +#include "dataproxy_sdk/test/random.h" + +namespace dataproxy_sdk { + +class TestDataProxyStream : public ::testing::Test { + public: + void SetUp() { + data_mesh_ = DataMeshMock::Make(); + CHECK_ARROW_OR_THROW(data_mesh_->StartServer(kDataMeshAddress)); + + dataproxy_sdk::proto::DataProxyConfig sdk_config; + sdk_config.set_data_proxy_addr(kDataMeshAddress); + data_proxy_stream_ = DataProxyStream::Make(sdk_config); + + data_ = RandomBatchGenerator::ExampleGenerate(); + } + + protected: + std::shared_ptr data_mesh_; + std::shared_ptr data_; + std::shared_ptr data_proxy_stream_; + const std::string kDataMeshAddress = "127.0.0.1:23336"; +}; + +TEST_F(TestDataProxyStream, PutAndGet) { + proto::UploadInfo upload_info; + upload_info.set_domaindata_id(""); + upload_info.set_type("table"); + for (const auto& field : data_->schema()->fields()) { + auto column = upload_info.add_columns(); + column->set_name(field->name()); + column->set_type(field->type()->name()); + } + auto writer = data_proxy_stream_->GetWriter(upload_info); + writer->Put(data_); + writer->Close(); + + proto::DownloadInfo download_info; + download_info.set_domaindata_id("test"); + auto reader = data_proxy_stream_->GetReader(download_info); + std::shared_ptr result_batch; + reader->Get(&result_batch); + + EXPECT_TRUE(data_->Equals(*result_batch)); + + proto::SQLInfo sql_info; + sql_info.set_datasource_id("test"); + sql_info.set_sql("select * from test;"); + reader = data_proxy_stream_->GetReader(sql_info); + std::shared_ptr sql_batch; + reader->Get(&sql_batch); + + EXPECT_TRUE(data_->Equals(*sql_batch)); +} + +} // namespace dataproxy_sdk diff --git a/dataproxy_sdk/cc/exception.h b/dataproxy_sdk/cc/exception.h index ac20290..446ff81 100644 --- a/dataproxy_sdk/cc/exception.h +++ b/dataproxy_sdk/cc/exception.h @@ -24,12 +24,12 @@ namespace dataproxy_sdk { #define DATAPROXY_ENFORCE_EQ(...) YACL_ENFORCE_EQ(__VA_ARGS__) -#define CHECK_ARROW_OR_THROW(statement) \ - do { \ - auto __s__ = (statement); \ - if (!__s__.ok()) { \ - DATAPROXY_THROW(__s__.ToString()); \ - } \ +#define CHECK_ARROW_OR_THROW(statement) \ + do { \ + auto __s__ = (statement); \ + if (!__s__.ok()) { \ + DATAPROXY_THROW("{}", __s__.ToString()); \ + } \ } while (false) #define CHECK_RESP_OR_THROW(resp) \ @@ -41,22 +41,22 @@ namespace dataproxy_sdk { } while (false) // For StatusOr from Asylo. -#define ASSIGN_ARROW_OR_THROW(lhs, rexpr) \ - do { \ - auto __s__ = (rexpr); \ - if (!__s__.ok()) { \ - DATAPROXY_THROW(__s__.status().message()); \ - } \ - lhs = std::move(__s__).ValueOrDie(); \ +#define ASSIGN_ARROW_OR_THROW(lhs, rexpr) \ + do { \ + auto __s__ = (rexpr); \ + if (!__s__.ok()) { \ + DATAPROXY_THROW("{}", __s__.status().ToString()); \ + } \ + lhs = std::move(__s__).ValueOrDie(); \ } while (false) -#define ASSIGN_DP_OR_THROW(lhs, rexpr) \ - auto&& _error_or_value = (rexpr); \ - do { \ - if ((__builtin_expect(!!(!(_error_or_value).ok()), 0))) { \ - DATAPROXY_THROW((_error_or_value).status().message()); \ - } \ - } while (0); \ +#define ASSIGN_DP_OR_THROW(lhs, rexpr) \ + auto&& _error_or_value = (rexpr); \ + do { \ + if ((__builtin_expect(!!(!(_error_or_value).ok()), 0))) { \ + DATAPROXY_THROW("{}", (_error_or_value).status().ToString()); \ + } \ + } while (0); \ lhs = std::move(_error_or_value).ValueUnsafe(); } // namespace dataproxy_sdk \ No newline at end of file diff --git a/dataproxy_sdk/cc/file_help.cc b/dataproxy_sdk/cc/file_help.cc index 2028be0..8c6eed0 100644 --- a/dataproxy_sdk/cc/file_help.cc +++ b/dataproxy_sdk/cc/file_help.cc @@ -166,7 +166,8 @@ class BinaryFileRead : public FileHelpRead { std::vector> arrays(1); CHECK_ARROW_OR_THROW(binary_build.Finish(&arrays[0])); *record_batch = - arrow::RecordBatch::Make(this->Schema(), arrays.size(), arrays); + arrow::RecordBatch::Make(this->Schema(), arrays[0]->length(), arrays); + CHECK_ARROW_OR_THROW((*record_batch)->Validate()); } } void DoClose() { CHECK_ARROW_OR_THROW(read_stream_->Close()); } diff --git a/dataproxy_sdk/cc/file_help_test.cc b/dataproxy_sdk/cc/file_help_test.cc index be2b4d3..26a899c 100644 --- a/dataproxy_sdk/cc/file_help_test.cc +++ b/dataproxy_sdk/cc/file_help_test.cc @@ -30,7 +30,7 @@ const std::string kORCFilePath = "test.orc"; const std::string kBianryFilePath = "test.txt"; template -std::unique_ptr GetDefaultFileHelp(const std::string &file_path) { +std::unique_ptr GetDefaultFileHelp(const std::string& file_path) { auto options = T::Options::Defaults(); auto ret = T::Make(GetFileFormat(file_path), file_path, options); return ret; @@ -208,4 +208,36 @@ TEST(FileHelpTestWithOption, ErrorORC) { EXPECT_THROW(reader->DoRead(&read_batch), yacl::Exception); } +void System(const std::string& cmd) { ASSERT_TRUE(system(cmd.c_str()) == 0); } + +TEST(FileHelpTest, LargeBinaryRead) { + const std::string kLargeBinaryFile = "large_file.txt"; + System("dd if=/dev/urandom of=large_file.txt bs=1M count=1"); + auto reader = GetDefaultFileHelp(kLargeBinaryFile); + while (1) { + std::shared_ptr read_batch; + reader->DoRead(&read_batch); + if (!read_batch) break; + } + + reader->DoClose(); +} + +TEST(FileHelpTest, LargeBinary) { + System("dd if=/dev/urandom of=large_file_source.txt bs=10M count=1"); + + auto writer = GetDefaultFileHelp("large_file_equal.txt"); + auto reader = GetDefaultFileHelp("large_file_source.txt"); + while (1) { + std::shared_ptr read_batch; + reader->DoRead(&read_batch); + if (!read_batch) break; + writer->DoWrite(read_batch); + } + writer->DoClose(); + reader->DoClose(); + + System("diff large_file_source.txt large_file_equal.txt"); +} + } // namespace dataproxy_sdk diff --git a/dataproxy_sdk/proto/data_proxy_pb.proto b/dataproxy_sdk/proto/data_proxy_pb.proto index e84f437..354c610 100644 --- a/dataproxy_sdk/proto/data_proxy_pb.proto +++ b/dataproxy_sdk/proto/data_proxy_pb.proto @@ -75,6 +75,12 @@ message DownloadInfo { } } +message SQLInfo { + string datasource_id = 1; + // only support select sql + string sql = 2; +} + message UploadInfo { // Optional, The domaindata_id would be generated by server if the // domaindata_id is empty. The unique identity of domaindata, it couldn't diff --git a/dataproxy_sdk/python/dataproxy/BUILD.bazel b/dataproxy_sdk/python/dataproxy/BUILD.bazel index 1d41f21..a58a15c 100644 --- a/dataproxy_sdk/python/dataproxy/BUILD.bazel +++ b/dataproxy_sdk/python/dataproxy/BUILD.bazel @@ -26,8 +26,8 @@ exports_files( ) pybind_extension( - name = "libdataproxy", - srcs = ["libdataproxy.cc"], + name = "_lib", + srcs = ["_lib.cc"], linkopts = select({ "@bazel_tools//src/conditions:darwin": [ "-Wl,-exported_symbols_list,$(location :exported_symbols.lds)", @@ -43,20 +43,37 @@ pybind_extension( ], ) +filegroup( + name = "lib_so", + srcs = [ + ":_lib.so", + ], +) + py_library( name = "data_proxy_file_py", srcs = [ - "dp_file_adapter.py", + "file_adapter.py", + ], + data = [ + ":lib_so", + ], +) + +py_library( + name = "data_proxy_stream_py", + srcs = [ + "stream.py", ], data = [ - ":libdataproxy.so", + ":lib_so", ], ) py_library( name = "protos", srcs = [ - "dp_pb2.py", + "proto.py", "//dataproxy_sdk/proto:data_proxy_proto_py", ], ) @@ -66,9 +83,10 @@ py_library( srcs = [ "__init__.py", ":data_proxy_file_py", + ":data_proxy_stream_py", ":protos", ], data = [ - ":libdataproxy.so", + ":lib_so", ], ) diff --git a/dataproxy_sdk/python/dataproxy/__init__.py b/dataproxy_sdk/python/dataproxy/__init__.py index 2190371..e912b24 100644 --- a/dataproxy_sdk/python/dataproxy/__init__.py +++ b/dataproxy_sdk/python/dataproxy/__init__.py @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import sdk +from .file_adapter import FileAdapter +from .proto import ( + DataProxyConfig, + DownloadInfo, + UploadInfo, + FileFormat, + TlSConfig, + DataColumn, +) +from .stream import StreamReader, StreamWriter, Stream -__all__ = [ - "sdk", -] +DataProxyFileAdapter = FileAdapter diff --git a/dataproxy_sdk/python/dataproxy/_lib.cc b/dataproxy_sdk/python/dataproxy/_lib.cc new file mode 100644 index 0000000..5ece005 --- /dev/null +++ b/dataproxy_sdk/python/dataproxy/_lib.cc @@ -0,0 +1,135 @@ +// Copyright 2024 Ant Group Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "arrow/c/bridge.h" +#include "arrow/record_batch.h" +#include "pybind11/pybind11.h" + +#include "dataproxy_sdk/cc/api.h" +#include "dataproxy_sdk/cc/exception.h" + +namespace py = pybind11; + +namespace dataproxy_sdk { + +void DeletePtr(void* ptr) { + if (ptr) { + free(ptr); + } +} + +PYBIND11_MODULE(_lib, m) { + m.doc() = R"pbdoc( + Secretflow-DataProxy-SDK Python Library + )pbdoc"; + + py::register_exception_translator( + [](std::exception_ptr p) { // NOLINT: pybind11 + try { + if (p) { + std::rethrow_exception(p); + } + } catch (const yacl::Exception& e) { + // Translate this exception to a standard RuntimeError + PyErr_SetString(PyExc_RuntimeError, + fmt::format("what: \n\t{}\n", e.what()).c_str()); + } + }); + + py::class_>(m, "DataProxyFile") + .def(py::init( + [](const py::bytes& config_str) -> std::unique_ptr { + proto::DataProxyConfig config; + config.ParseFromString(config_str); + return DataProxyFile::Make(config); + })) + .def("download_file", + [](DataProxyFile& self, const py::bytes& info_str, + const std::string& file_path, int file_format) { + proto::DownloadInfo info; + info.ParseFromString(info_str); + + self.DownloadFile(info, file_path, + static_cast(file_format)); + }) + .def("upload_file", + [](DataProxyFile& self, const py::bytes& info_str, + const std::string& file_path, int file_format) { + proto::UploadInfo info; + info.ParseFromString(info_str); + + self.UploadFile(info, file_path, + static_cast(file_format)); + }) + .def("close", &DataProxyFile::Close); + + py::class_>( + m, "DataProxyStreamReader") + .def("get", + [](DataProxyStreamReader& self) -> py::object { + std::shared_ptr batch; + self.Get(&batch); + if (batch == nullptr) { + return py::none(); + } + ArrowArray* ret_array = (ArrowArray*)malloc(sizeof(ArrowArray)); + CHECK_ARROW_OR_THROW(arrow::ExportRecordBatch(*batch, ret_array)); + return py::capsule(ret_array, nullptr, DeletePtr); + }) + .def("schema", [](DataProxyStreamReader& self) -> py::object { + auto schema = self.Schema(); + DATAPROXY_ENFORCE(schema != nullptr); + ArrowSchema* ret_schema = (ArrowSchema*)malloc(sizeof(*ret_schema)); + CHECK_ARROW_OR_THROW(arrow::ExportSchema(*schema, ret_schema)); + return py::capsule(ret_schema, "arrow_schema", DeletePtr); + }); + + py::class_>( + m, "DataProxyStreamWriter") + .def("put", + [](DataProxyStreamWriter& self, py::capsule schema_capsule, + py::capsule array_capsule) { + ArrowArray* array = array_capsule.get_pointer(); + ArrowSchema* schema = schema_capsule.get_pointer(); + ASSIGN_DP_OR_THROW(auto batch, + arrow::ImportRecordBatch(array, schema)); + self.Put(batch); + }) + .def("close", [](DataProxyStreamWriter& self) { self.Close(); }); + + py::class_>( + m, "DataProxyStream") + .def(py::init( + [](const py::bytes& config_str) -> std::shared_ptr { + proto::DataProxyConfig config; + config.ParseFromString(config_str); + return DataProxyStream::Make(config); + })) + .def("get_reader", + [](DataProxyStream& self, const py::bytes& info_str) + -> std::unique_ptr { + proto::DownloadInfo info; + info.ParseFromString(info_str); + return self.GetReader(info); + }) + .def("get_writer", + [](DataProxyStream& self, const py::bytes& info_str) + -> std::unique_ptr { + proto::UploadInfo info; + info.ParseFromString(info_str); + return self.GetWriter(info); + }); +} + +} // namespace dataproxy_sdk \ No newline at end of file diff --git a/dataproxy_sdk/python/dataproxy/dp_file_adapter.py b/dataproxy_sdk/python/dataproxy/file_adapter.py similarity index 63% rename from dataproxy_sdk/python/dataproxy/dp_file_adapter.py rename to dataproxy_sdk/python/dataproxy/file_adapter.py index 6f0aaee..be614d1 100644 --- a/dataproxy_sdk/python/dataproxy/dp_file_adapter.py +++ b/dataproxy_sdk/python/dataproxy/file_adapter.py @@ -12,18 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import libdataproxy -from . import dp_pb2 as proto -import logging +from ._lib import DataProxyFile +from . import proto +import pyarrow as pa import os +import logging +import hashlib + + +def get_file_md5(fname): + m = hashlib.md5() # 创建md5对象 + with open(fname, "rb") as fobj: + while True: + data = fobj.read(4096) + if not data: + break + m.update(data) # 更新md5对象 + return m.hexdigest() # 返回md5对象 -class DataProxyFileAdapter: + +class FileAdapter: def __init__(self, config: proto.DataProxyConfig): - self.data_proxy_file = libdataproxy.DataProxyFile(config.SerializeToString()) + self.file = DataProxyFile(config.SerializeToString()) def close(self): - self.data_proxy_file.close() + self.file.close() def download_file( self, info: proto.DownloadInfo, file_path: str, file_format: proto.FileFormat @@ -32,13 +46,12 @@ def download_file( f"dataproxy sdk: start download_file[{file_path}], type[{file_format}]" ) - self.data_proxy_file.download_file( - info.SerializeToString(), file_path, file_format - ) + self.file.download_file(info.SerializeToString(), file_path, file_format) size = os.path.getsize(file_path) + md5 = get_file_md5(file_path) logging.info( - f"dataproxy sdk: download_file[{file_path}], type[{file_format}], size[{size}]" + f"dataproxy sdk: download_file[{file_path}], type[{file_format}], size[{size}], md5[{md5}]" ) def upload_file( @@ -48,11 +61,10 @@ def upload_file( f"dataproxy sdk: start upload_file[{file_path}], type[{file_format}]" ) - self.data_proxy_file.upload_file( - info.SerializeToString(), file_path, file_format - ) + self.file.upload_file(info.SerializeToString(), file_path, file_format) size = os.path.getsize(file_path) + md5 = get_file_md5(file_path) logging.info( - f"dataproxy sdk: upload_file[{file_path}], type[{file_format}], size[{size}]" + f"dataproxy sdk: upload_file[{file_path}], type[{file_format}], size[{size}], md5[{md5}]" ) diff --git a/dataproxy_sdk/python/dataproxy/dp_pb2.py b/dataproxy_sdk/python/dataproxy/proto.py similarity index 91% rename from dataproxy_sdk/python/dataproxy/dp_pb2.py rename to dataproxy_sdk/python/dataproxy/proto.py index 4e68795..234c5f6 100644 --- a/dataproxy_sdk/python/dataproxy/dp_pb2.py +++ b/dataproxy_sdk/python/dataproxy/proto.py @@ -13,3 +13,4 @@ # limitations under the License. from dataproxy_sdk.proto.data_proxy_pb_pb2 import * +from kuscia.proto.api.v1alpha1.common_pb2 import DataColumn diff --git a/dataproxy_sdk/python/dataproxy/sdk.py b/dataproxy_sdk/python/dataproxy/sdk.py index cd4a158..3888345 100644 --- a/dataproxy_sdk/python/dataproxy/sdk.py +++ b/dataproxy_sdk/python/dataproxy/sdk.py @@ -12,5 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .dp_file_adapter import * -from .dp_pb2 import * +from dataproxy.file_adapter import * +from dataproxy.proto import * +from dataproxy.stream import * + +DataProxyFileAdapter = FileAdapter diff --git a/dataproxy_sdk/python/dataproxy/stream.py b/dataproxy_sdk/python/dataproxy/stream.py new file mode 100644 index 0000000..1bc6100 --- /dev/null +++ b/dataproxy_sdk/python/dataproxy/stream.py @@ -0,0 +1,65 @@ +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._lib import ( + DataProxyStreamWriter, + DataProxyStreamReader, + DataProxyStream, +) +from . import proto +import logging +import os +import pyarrow + + +class StreamReader: + def __init__(self, reader: DataProxyStreamReader): + self.reader = reader + schema_capsule = reader.schema() + self.schema = pyarrow.Schema._import_from_c_capsule(schema_capsule) + + def get(self): + array_ptr = self.reader.get() + if array_ptr is None: + raise StopIteration + + return pyarrow.RecordBatch._import_from_c(array_ptr, self.schema) + + def get_schema(self): + return self.schema + + +class StreamWriter: + def __init__(self, writer: DataProxyStreamWriter): + self.writer = writer + + def put(self, batch: pyarrow.RecordBatch): + schema_capsule, array_capsule = batch.__arrow_c_array__() + self.writer.put(schema_capsule, array_capsule) + + def close(self): + self.writer.close() + + +class Stream: + def __init__(self, config: proto.DataProxyConfig): + self.stream = DataProxyStream(config.SerializeToString()) + + def get_reader(self, info: proto.DownloadInfo): + reader = self.stream.get_reader(info.SerializeToString()) + return StreamReader(reader) + + def get_writer(self, info: proto.UploadInfo): + writer = self.stream.get_writer(info.SerializeToString()) + return StreamWriter(writer) diff --git a/dataproxy_sdk/python/dataproxy/version.py b/dataproxy_sdk/python/dataproxy/version.py index 11cf104..666772b 100644 --- a/dataproxy_sdk/python/dataproxy/version.py +++ b/dataproxy_sdk/python/dataproxy/version.py @@ -13,4 +13,4 @@ # limitations under the License. -__version__ = "0.2.0b0" +__version__ = "0.3.0b0" diff --git a/dataproxy_sdk/python/requirements.txt b/dataproxy_sdk/python/requirements.txt index fca26b9..a537d4e 100644 --- a/dataproxy_sdk/python/requirements.txt +++ b/dataproxy_sdk/python/requirements.txt @@ -1,2 +1,4 @@ protobuf>=4,<5 -kuscia==0.0.3b0 \ No newline at end of file +kuscia==0.0.3b0 +pyarrow==14.0.2 +numpy<2.0 \ No newline at end of file diff --git a/dataproxy_sdk/python/setup.py b/dataproxy_sdk/python/setup.py index cb2abad..e395a7c 100644 --- a/dataproxy_sdk/python/setup.py +++ b/dataproxy_sdk/python/setup.py @@ -40,9 +40,6 @@ def bazel_invoke(invoker, cmdline, *args, **kwargs): raise -# NOTE: The lists below must be kept in sync with dataproxy/BUILD.bazel. -ops_lib_files = [BAZEL_BIN + "dataproxy_sdk/python/dataproxy/libdataproxy.so"] - # These are the directories where automatically generated Python protobuf # bindings are created. generated_python_directories = [ @@ -84,6 +81,14 @@ def __init__(self, name: str, sourcedir: str = "") -> None: self.sourcedir = os.fspath(Path(sourcedir).resolve()) +def get_lib_files(): + ops_lib_files = [] + lib_output_path = Path(BAZEL_BIN + "dataproxy_sdk/python/dataproxy/") + for file in lib_output_path.glob("*.so"): + ops_lib_files.append(file) + return ops_lib_files + + class BazelBuild(build_ext): def build_extension(self, ext: BazelExtension) -> None: bazel_env = dict(os.environ, PYTHON3_BIN_PATH=sys.executable) @@ -112,7 +117,7 @@ def build_extension(self, ext: BazelExtension) -> None: ) copied_files = 0 - files_to_copy = ops_lib_files + files_to_copy = get_lib_files() # Copy over the autogenerated protobuf Python bindings. for directory in generated_python_directories: diff --git a/dataproxy_sdk/python/test/BUILD.bazel b/dataproxy_sdk/python/test/BUILD.bazel new file mode 100644 index 0000000..2587fa0 --- /dev/null +++ b/dataproxy_sdk/python/test/BUILD.bazel @@ -0,0 +1,62 @@ +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") +load("@rules_python//python:defs.bzl", "py_library", "py_test") + +package(default_visibility = ["//visibility:public"]) + +pybind_extension( + name = "_dm_mock", + srcs = ["_dm_mock.cc"], + deps = [ + "//dataproxy_sdk/cc:exception", + "//dataproxy_sdk/test:data_mesh_mock", + ], +) + +filegroup( + name = "dm_mock_so", + srcs = [ + ":_dm_mock.so", + ], +) + +py_library( + name = "dm_mock_py", + srcs = [ + "dm_mock.py", + ], + data = [ + ":dm_mock_so", + ], +) + +py_test( + name = "file_test", + srcs = ["file_test.py"], + deps = [ + ":dm_mock_py", + "//dataproxy_sdk/python/dataproxy:init", + ], +) + +py_test( + name = "stream_test", + srcs = ["stream_test.py"], + deps = [ + ":dm_mock_py", + "//dataproxy_sdk/python/dataproxy:init", + ], +) diff --git a/dataproxy_sdk/python/dataproxy/libdataproxy.cc b/dataproxy_sdk/python/test/_dm_mock.cc similarity index 50% rename from dataproxy_sdk/python/dataproxy/libdataproxy.cc rename to dataproxy_sdk/python/test/_dm_mock.cc index dfe3b35..43b6125 100644 --- a/dataproxy_sdk/python/dataproxy/libdataproxy.cc +++ b/dataproxy_sdk/python/test/_dm_mock.cc @@ -14,16 +14,16 @@ #include "pybind11/pybind11.h" -#include "dataproxy_sdk/cc/api.h" #include "dataproxy_sdk/cc/exception.h" +#include "dataproxy_sdk/test/data_mesh_mock.h" namespace py = pybind11; namespace dataproxy_sdk { -PYBIND11_MODULE(libdataproxy, m) { +PYBIND11_MODULE(_dm_mock, m) { m.doc() = R"pbdoc( - Secretflow-DataProxy-SDK Python Library + Secretflow-DataProxy-SDK Python Test Library )pbdoc"; py::register_exception_translator( @@ -39,32 +39,17 @@ PYBIND11_MODULE(libdataproxy, m) { } }); - py::class_>(m, "DataProxyFile") - .def(py::init( - [](const py::bytes& config_str) -> std::unique_ptr { - proto::DataProxyConfig config; - config.ParseFromString(config_str); - return DataProxyFile::Make(config); - })) - .def("download_file", - [](DataProxyFile& self, const py::bytes& info_str, - const std::string& file_path, int file_format) { - proto::DownloadInfo info; - info.ParseFromString(info_str); - - self.DownloadFile(info, file_path, - static_cast(file_format)); - }) - .def("upload_file", - [](DataProxyFile& self, const py::bytes& info_str, - const std::string& file_path, int file_format) { - proto::UploadInfo info; - info.ParseFromString(info_str); - - self.UploadFile(info, file_path, - static_cast(file_format)); + py::class_>(m, "DataMeshMock") + .def(py::init([]() -> std::unique_ptr { + return DataMeshMock::Make(); + })) + .def("start", + [](DataMeshMock& self, const std::string& ip, bool open_dp) { + CHECK_ARROW_OR_THROW(self.StartServer(ip, open_dp)); }) - .def("close", &DataProxyFile::Close); + .def("close", [](DataMeshMock& self) { + CHECK_ARROW_OR_THROW(self.CloseServer()); + }); } } // namespace dataproxy_sdk \ No newline at end of file diff --git a/dataproxy_sdk/python/test/dm_mock.py b/dataproxy_sdk/python/test/dm_mock.py new file mode 100644 index 0000000..1e8156a --- /dev/null +++ b/dataproxy_sdk/python/test/dm_mock.py @@ -0,0 +1,26 @@ +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._dm_mock import DataMeshMock + + +class DataMesh: + def __init__(self): + self.dm_server = DataMeshMock() + + def start(self, ip, open_dp=False): + self.dm_server.start(ip, open_dp) + + def close(self): + self.dm_server.close() diff --git a/dataproxy_sdk/python/test/file_test.py b/dataproxy_sdk/python/test/file_test.py new file mode 100644 index 0000000..e5f5411 --- /dev/null +++ b/dataproxy_sdk/python/test/file_test.py @@ -0,0 +1,69 @@ +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataproxy_sdk.python.dataproxy import ( + DataProxyConfig, + UploadInfo, + DownloadInfo, + DataColumn, + FileAdapter, + FileFormat, +) +from dataproxy_sdk.python.test.dm_mock import DataMesh +from pyarrow.orc import write_table, read_table +import pyarrow as pa +import unittest + + +class TestFile(unittest.TestCase): + def __init__(self, methodName: str = "runTest") -> None: + super().__init__(methodName) + self.dm = DataMesh() + self.dm_ip = "127.0.0.1:24001" + self.dm.start(self.dm_ip) + + def test_file(self): + x = pa.array([2, 2, 4, 4, 5, 100]) + y = pa.array( + ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"] + ) + schema = pa.schema([("x", pa.int32()), ("y", pa.string())]) + batch = pa.RecordBatch.from_arrays([x, y], schema=schema) + + test_file = "py_file_test.orc" + table = pa.Table.from_batches([batch]) + write_table(test_file, table) + + config = DataProxyConfig(data_proxy_addr=self.dm_ip) + file_adapter = FileAdapter(config) + + columns = [] + schema = batch.schema + for name, type in zip(schema.names, schema.types): + columns.append(DataColumn(name=str(name), type=str(type))) + + upload_info = UploadInfo(type="table", columns=columns) + file_adapter.upload_file(upload_info, test_file, FileFormat.ORC) + + download_info = DownloadInfo(domaindata_id="test") + result_file = "py_file_result.orc" + file_adapter.download_file(download_info, result_file, FileFormat.ORC) + + result_table = read_table(result_file) + + self.assertTrue(result_table.equals(table)) + + +if __name__ == "__main__": + unittest.main() diff --git a/dataproxy_sdk/python/test/stream_test.py b/dataproxy_sdk/python/test/stream_test.py new file mode 100644 index 0000000..13fd228 --- /dev/null +++ b/dataproxy_sdk/python/test/stream_test.py @@ -0,0 +1,64 @@ +# Copyright 2024 Ant Group Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataproxy_sdk.python.dataproxy import ( + DataProxyConfig, + UploadInfo, + DownloadInfo, + DataColumn, + Stream, + StreamReader, + StreamWriter, +) +from dataproxy_sdk.python.test.dm_mock import DataMesh +import pyarrow as pa +import unittest + + +class TestStream(unittest.TestCase): + def __init__(self, methodName: str = "runTest") -> None: + super().__init__(methodName) + self.dm = DataMesh() + self.dm_ip = "127.0.0.1:24002" + self.dm.start(self.dm_ip) + + def test_stream(self): + x = pa.array([2, 2, 4, 4, 5, 100]) + y = pa.array( + ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"] + ) + schema = pa.schema([("x", pa.int32()), ("y", pa.string())]) + batch = pa.RecordBatch.from_arrays([x, y], schema=schema) + + config = DataProxyConfig(data_proxy_addr=self.dm_ip) + stream = Stream(config) + + columns = [] + for name, type in zip(schema.names, schema.types): + columns.append(DataColumn(name=str(name), type=str(type))) + + upload_info = UploadInfo(type="table", columns=columns) + stream_writer = stream.get_writer(upload_info) + stream_writer.put(batch) + stream_writer.close() + + download_info = DownloadInfo(domaindata_id="test") + stream_reader = stream.get_reader(download_info) + ret_batchs = stream_reader.get() + + self.assertTrue(ret_batchs.equals(batch)) + + +if __name__ == "__main__": + unittest.main() diff --git a/dataproxy_sdk/test/data_mesh_mock.cc b/dataproxy_sdk/test/data_mesh_mock.cc index 2db096f..9489379 100644 --- a/dataproxy_sdk/test/data_mesh_mock.cc +++ b/dataproxy_sdk/test/data_mesh_mock.cc @@ -19,6 +19,7 @@ #include "arrow/flight/api.h" #include "arrow/table.h" +#include "spdlog/spdlog.h" namespace dataproxy_sdk { @@ -31,6 +32,7 @@ class DataMeshMockServer : public arrow::flight::FlightServerBase { const arrow::flight::ServerCallContext &, const arrow::flight::FlightDescriptor &descriptor, std::unique_ptr *info) override { + SPDLOG_INFO("GetFlightInfo:{}", descriptor.ToString()); ARROW_ASSIGN_OR_RAISE(auto flight_info, MakeFlightInfo()); *info = std::unique_ptr( new arrow::flight::FlightInfo(std::move(flight_info))); @@ -55,11 +57,14 @@ class DataMeshMockServer : public arrow::flight::FlightServerBase { std::shared_ptr owning_reader; std::shared_ptr schema; - if (table_) { - arrow::TableBatchReader batch_reader(*table_); - ARROW_ASSIGN_OR_RAISE(batches, batch_reader.ToRecordBatches()); - schema = table_->schema(); + if (!table_) { + return arrow::Status::Invalid("mock don't have data."); } + + arrow::TableBatchReader batch_reader(*table_); + ARROW_ASSIGN_OR_RAISE(batches, batch_reader.ToRecordBatches()); + schema = table_->schema(); + ARROW_ASSIGN_OR_RAISE(owning_reader, arrow::RecordBatchReader::Make( std::move(batches), schema)); *stream = std::unique_ptr( diff --git a/pom.xml b/pom.xml index e8166ae..16d9f51 100644 --- a/pom.xml +++ b/pom.xml @@ -3,466 +3,250 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - pom - dataproxy-common dataproxy-api - dataproxy-manager - dataproxy-service + dataproxy-common + + dataproxy-server - dataproxy-integration-tests + + dataproxy-plugins + dataproxy-core - - org.springframework.boot - spring-boot-starter-parent - 3.1.12 - org.secretflow dataproxy 0.0.1-SNAPSHOT + pom dataproxy - dataproxy parent + + dataproxy parent pom 17 + + UTF-8 + 17 17 - 1.18.24 - 3.22.5 - 1.62.2 - 32.1.1-jre - 2.17.2 - 1.3.2 - 1.7 - 1.4.14 + 3.6.0 + 3.3.1 + 3.7.1 + 3.4.2 + + 1.7.1 + 0.6.1 + 3.3.1 + + 1.18.34 + + 18.0.0 + 4.28.3 + 1.68.1 + 4.1.115.Final + 1.3.2 + 3.1.0 + + 2.0.16 + 1.5.6 + 3.1.8 + 2.0 + + + 0.50.4-public + 1.1.10.7 + + 2.17.0 + 3.12.0 4.4 1.26.2 2.10.1 - 4.0.3 - 1.3.1 - 3.4.0 - 14.0.0 - 2.0 - 1.9.6 - 2.0.1.Final - 6.2.3.Final - 4.1.101.Final - 4.1.101.Final - 2.0.61.Final - 5.4 - 2.5.5 - - 8.2.0 - 3.3.6 - 1.5.4 - - 0.48.6-public - 1.1.5 - 1.0 - 1.6.4 - 3.3.13 - - 1.7.1 - 0.6.1 - 3.3.1 - UTF-8 + - org.secretflow - dataproxy-api - ${project.version} - - - org.secretflow - dataproxy-common - ${project.version} - - - org.secretflow - dataproxy-integration-tests - ${project.version} + org.apache.arrow + arrow-bom + ${arrow.version} + pom + import + + - org.secretflow - dataproxy-manager - ${project.version} + io.netty + netty-bom + ${netty.version} + pom + import + + - org.secretflow - dataproxy-server - ${project.version} + io.grpc + grpc-bom + ${grpc.version} + pom + import + + - org.secretflow - dataproxy-service - ${project.version} + com.google.protobuf + protobuf-bom + ${protobuf.version} + pom + import org.projectlombok lombok ${lombok.version} + compile + javax.annotation javax.annotation-api - ${javax.version} - - - com.google.guava - guava - ${guava.version} - - - io.netty - netty-all - ${netty-all.version} - - - io.netty - netty-handler - ${netty-all.version} - - - io.netty - netty-tcnative-boringssl-static - ${netty-tcnative-boringssl-static.version} + ${javax-annotation-api.version} - com.google.protobuf - protobuf-java - ${protobuf.version} - - - com.google.protobuf - protobuf-java-util - ${protobuf.version} + jakarta.validation + jakarta.validation-api + ${jakarta.validation-api.version} + + - io.grpc - grpc-netty-shaded - ${grpc.version} + org.slf4j + slf4j-bom + ${slf4j.version} + pom + import - - - - org.apache.commons - commons-compress - ${commons-compress.version} + ch.qos.logback + logback-parent + ${logback.version} + pom + import + - - org.apache.commons - commons-configuration2 - ${commons-configuration2.version} + org.yaml + snakeyaml + ${snakeyaml.version} - - - - - org.apache.arrow - arrow-vector - ${arrow.version} - - - org.apache.arrow - arrow-dataset - ${arrow.version} - - org.apache.arrow - arrow-memory-netty - ${arrow.version} + com.github.ben-manes.caffeine + caffeine + ${caffeine.version} + - org.apache.arrow - arrow-format - ${arrow.version} + commons-io + commons-io + ${commons-io.version} + + - org.apache.arrow - flight-core - ${arrow.version} + com.aliyun.odps + odps-sdk-core + ${odps-code.version} - io.netty - netty-transport-native-unix-common + org.codehaus.jackson + jackson-mapper-asl - io.netty - netty-transport-native-kqueue + com.fasterxml.jackson.core + jackson-databind - io.netty - netty-transport-native-epoll + com.fasterxml.jackson.core + jackson-annotations - io.netty - netty-handler + com.google.protobuf + protobuf-java - com.fasterxml.jackson.core - * + com.google.guava + guava + + + com.google.code.gson + gson - - org.apache.arrow - flight-grpc - ${arrow.version} - - + - com.fasterxml.jackson.datatype - jackson-datatype-jsr310 - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} - - - - - com.squareup.okio - okio - ${okio.version} - - - org.apache.commons - commons-lang3 - ${commons-lang3.version} - - - org.apache.commons - commons-collections4 - ${commons-collections4.version} - - - org.yaml - snakeyaml - ${snakeyaml.version} - - - commons-io - commons-io - 2.14.0 - - - com.opencsv - opencsv - ${opencsv.version} - - - com.github.ben-manes.caffeine - caffeine - ${caffeine.version} - - - - - javax.validation - validation-api - ${validation-api.version} - - - org.hibernate.validator - hibernate-validator - ${hibernate-validator.version} + org.xerial.snappy + snappy-java + ${snappy-java.version} - - - - org.aspectj - aspectjweaver - ${aspectj.version} - - - org.aspectj - aspectjrt - ${aspectj.version} - - - + - com.zaxxer - HikariCP - ${hikaricp.version} + org.secretflow + dataproxy-core + ${project.version} - com.mysql - mysql-connector-j - ${mysql-connector-j.version} + org.secretflow + dataproxy-api + ${project.version} - org.apache.hadoop - hadoop-common - ${hadoop.version} - - - org.slf4j - slf4j-reload4j - - - jdk.tools - jdk.tools - - - org.apache.avro - avro - - - org.codehaus.jettison - jettison - - - org.apache.zookeeper - zookeeper - - - org.xerial.snappy - snappy-java - - + org.secretflow + dataproxy-common + ${project.version} - org.apache.hadoop - hadoop-aws - ${hadoop.version} + org.secretflow + dataproxy-integration-tests + ${project.version} - org.apache.hadoop - hadoop-aliyun - ${hadoop.version} + org.secretflow + dataproxy-manager + ${project.version} - - - com.aliyun.odps - odps-sdk-core - ${odps-code.version} - - - org.codehaus.jackson - jackson-mapper-asl - - + org.secretflow + dataproxy-server + ${project.version} - - org.xerial.snappy - snappy-java - 1.1.10.5 + org.secretflow + dataproxy-service + ${project.version} - - com.aliyun.oss - aliyun-sdk-oss - 3.17.3 - - - org.apache.httpcomponents - httpclient - - - commons-beanutils - commons-beanutils - - - org.apache.commons - commons-lang3 - - - - javax.xml.bind - jaxb-api - - - org.slf4j - slf4j-api - - - com.google.code.gson - gson - - - org.apache.httpcomponents - httpcore - - - commons-logging - commons-logging - - - - org.codehaus.jettison - jettison - - + org.secretflow + dataproxy-plugin-odps + ${project.version} + - - - logback-classic - ch.qos.logback - ${logback-classic.version} - - - logback-core - ch.qos.logback - ${logback-classic.version} - - - org.springframework.boot - spring-boot-starter-validation - - - logback-classic - ch.qos.logback - - - logback-core - ch.qos.logback - - - - - org.springframework.boot - spring-boot-starter-test - test - - - @@ -471,23 +255,75 @@ ${plugin.os.version} + + + + org.apache.maven.plugins + maven-dependency-plugin + ${maven-dependency-plugin.version} + + + copy-dependencies + prepare-package + + copy-dependencies + + + + ${session.executionRootDirectory}/libs + + false + false + true + runtime + false + + + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven-surefire-plugin.version} + + --add-opens=java.base/java.nio=ALL-UNNAMED + + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-jar-plugin.version} + + + org.apache.maven.plugins + maven-dependency-plugin + ${maven-dependency-plugin.version} + + org.xolstice.maven.plugins protobuf-maven-plugin ${plugin.protobuf.version} + grpc-java false src/main/java + com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} - grpc-java + io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier} + src compile compile-custom diff --git a/proto/kuscia/flightdm.proto b/proto/kuscia/flightdm.proto index 30895f0..51f2217 100644 --- a/proto/kuscia/flightdm.proto +++ b/proto/kuscia/flightdm.proto @@ -16,7 +16,6 @@ syntax = "proto3"; package kuscia.proto.api.v1alpha1.datamesh; -import "kuscia/common.proto"; import "kuscia/domaindata.proto"; import "kuscia/domaindatasource.proto"; @@ -137,3 +136,9 @@ message ActionCreateDomainDataSourceRequest { message ActionCreateDomainDataSourceResponse { CreateDomainDataSourceResponse response = 1; } + +message CommandDataSourceSqlQuery { + string datasource_id = 1; + // only support select sql + string sql = 2; +} \ No newline at end of file diff --git a/proto/kuscia/flightinner.proto b/proto/kuscia/flightinner.proto index 97e893a..ce97dbf 100644 --- a/proto/kuscia/flightinner.proto +++ b/proto/kuscia/flightinner.proto @@ -33,3 +33,8 @@ message CommandDataMeshUpdate { DomainData domaindata = 2; DomainDataSource datasource = 3; } + +message CommandDataMeshSqlQuery { + CommandDataSourceSqlQuery query = 1; + DomainDataSource datasource = 2; +} From 36338ccb75b1649690dd9edc89170e03be60d382 Mon Sep 17 00:00:00 2001 From: yuexie <38447111+YanZhuangz@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:11:16 +0800 Subject: [PATCH 2/3] Update repositories.bzl remove url --- dataproxy_sdk/bazel/repositories.bzl | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/dataproxy_sdk/bazel/repositories.bzl b/dataproxy_sdk/bazel/repositories.bzl index 2a246a0..ef1cbea 100644 --- a/dataproxy_sdk/bazel/repositories.bzl +++ b/dataproxy_sdk/bazel/repositories.bzl @@ -52,22 +52,14 @@ def _yacl(): ) def _kuscia(): - # maybe( - # http_archive, - # name = "kuscia", - # urls = [ - # "https://github.com/secretflow/kuscia/archive/refs/tags/v0.11.0b0.tar.gz", - # ], - # strip_prefix = "kuscia-0.11.0b0", - # sha256 = "c8de425a5f442ba3fa30a9b5943f9fd056efd9ab610ddc2168d5ffcf71224974", - # ) - - # TODO:发版需要替换成github地址 maybe( - git_repository, + http_archive, name = "kuscia", - commit = "04b5f468a397a0a9e54b34a461fcd6e81b2aad9a", - remote = "git@code.alipay.com:secretflow/kuscia.git", + urls = [ + "https://github.com/secretflow/kuscia/archive/refs/tags/v0.11.0b0.tar.gz", + ], + strip_prefix = "kuscia-0.11.0b0", + sha256 = "c8de425a5f442ba3fa30a9b5943f9fd056efd9ab610ddc2168d5ffcf71224974", ) def _bazel_rules_pkg(): From 1ab66e3bd1eb885e4f7fd0b079af022af70cb27d Mon Sep 17 00:00:00 2001 From: yuexie <38447111+YanZhuangz@users.noreply.github.com> Date: Fri, 20 Dec 2024 10:49:47 +0800 Subject: [PATCH 3/3] Update Kuscia version update Kuscia to 0.13.0b0 --- dataproxy_sdk/bazel/repositories.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dataproxy_sdk/bazel/repositories.bzl b/dataproxy_sdk/bazel/repositories.bzl index ef1cbea..5164cf8 100644 --- a/dataproxy_sdk/bazel/repositories.bzl +++ b/dataproxy_sdk/bazel/repositories.bzl @@ -56,10 +56,10 @@ def _kuscia(): http_archive, name = "kuscia", urls = [ - "https://github.com/secretflow/kuscia/archive/refs/tags/v0.11.0b0.tar.gz", + "https://github.com/secretflow/kuscia/archive/refs/tags/v0.13.0b0.tar.gz", ], - strip_prefix = "kuscia-0.11.0b0", - sha256 = "c8de425a5f442ba3fa30a9b5943f9fd056efd9ab610ddc2168d5ffcf71224974", + strip_prefix = "kuscia-0.13.0b0", + sha256 = "8c7b638ef510a665af12f7b92ed0c43de7712154234e52ef4d8609b8afebfdac", ) def _bazel_rules_pkg():