From 2aebb555aa0e7448a91dfab34be07f2fcbd236a3 Mon Sep 17 00:00:00 2001 From: cirnoooo123 <1601169949@qq.com> Date: Fri, 21 Apr 2023 21:42:28 +0800 Subject: [PATCH] implement distance-join, knn-func, knn-join --- .../OpenHuFuSpatialBenchmarkTest.java | 27 +++- .../test/resources/spatial-user-configs.json | 9 ++ .../openhufu/common/exception/ErrorCode.java | 1 + .../core/implementor/UserSideImplementor.java | 119 +++++++++++++++++- .../spatial/join/DistanceJoin.java | 53 ++++++++ .../implementor/spatial/join/KNNJoin.java | 82 ++++++++++++ .../spatial/knn/BinarySearchKNN.java | 18 ++- .../implementor/spatial/knn/KNNConverter.java | 44 +++++++ .../openhufu/data/storage/ArrayRow.java | 25 ++++ .../sample/spatial/database0/join_left.csv | 11 ++ .../sample/spatial/database0/join_left.scm | 2 + release/config/spatialOwner1.json | 18 +++ .../java/com/hufudb/openhufu/udf/KNN.java | 44 +++++++ .../com.hufudb.openhufu.udf.ScalarUDF | 3 +- 14 files changed, 440 insertions(+), 16 deletions(-) create mode 100644 core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/DistanceJoin.java create mode 100644 core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/KNNJoin.java create mode 100644 core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/KNNConverter.java create mode 100644 dataset/sample/spatial/database0/join_left.csv create mode 100644 dataset/sample/spatial/database0/join_left.scm create mode 100644 udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java diff --git a/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialBenchmarkTest.java b/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialBenchmarkTest.java index 3bf2957f..143ffa6a 100644 --- a/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialBenchmarkTest.java +++ b/benchmark/src/test/java/com/hufudb/openhufu/benchmark/OpenHuFuSpatialBenchmarkTest.java @@ -124,7 +124,7 @@ public void testSqlRangeCount() throws SQLException { @Test public void testSqlRangeJoin() throws SQLException { - String sql = "select * from spatial s1 join spatial s2 on DWithin(s1.S_POINT, s2.S_POINT, 5)"; + String sql = "select * from join_left s1 join spatial s2 on DWithin(s1.JL_POINT, s2.S_POINT, 500000)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); long count = 0; @@ -132,14 +132,29 @@ public void testSqlRangeJoin() throws SQLException { printLine(dataset); ++count; } - assertEquals(1, count); + assertEquals(78, count); dataset.close(); } } @Test - public void testSqlKNNQuery() throws SQLException { - String sql = "select * from spatial order by Distance(S_POINT, POINT(1404050, -4762163)) asc limit 10"; + public void testSqlKNNQuery1() throws SQLException { + String sql = "select S_ID from spatial order by Distance(POINT(1404050, -4762163), S_POINT) asc limit 10"; + try (Statement stmt = user.createStatement()) { + ResultSet dataset = stmt.executeQuery(sql); + long count = 0; + while (dataset.next()) { + printLine(dataset); + ++count; + } + assertEquals(10, count); + dataset.close(); + } + } + + @Test + public void testSqlKNNQuery2() throws SQLException { + String sql = "select S_ID from spatial where KNN(POINT(1404050, -4762163), S_POINT, 10)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); long count = 0; @@ -154,7 +169,7 @@ public void testSqlKNNQuery() throws SQLException { @Test public void testSqlKNNJOIN() throws SQLException { - String sql = "select * from spatial s1 join spatial s2 on KNN(s1.S_POINT, s2.S_POINT, 5)"; + String sql = "select s1.JL_ID, s2.S_ID from join_left s1 join spatial s2 on KNN(s1.JL_POINT, s2.S_POINT, 5)"; try (Statement stmt = user.createStatement()) { ResultSet dataset = stmt.executeQuery(sql); long count = 0; @@ -162,7 +177,7 @@ public void testSqlKNNJOIN() throws SQLException { printLine(dataset); ++count; } - assertEquals(1, count); + assertEquals(50, count); dataset.close(); } } diff --git a/benchmark/src/test/resources/spatial-user-configs.json b/benchmark/src/test/resources/spatial-user-configs.json index 9cc5942b..313a045c 100644 --- a/benchmark/src/test/resources/spatial-user-configs.json +++ b/benchmark/src/test/resources/spatial-user-configs.json @@ -21,6 +21,15 @@ "localName": "spatial" } ] + }, + { + "tableName": "join_left", + "localTables": [ + { + "endpoint": "localhost:12345", + "localName": "join_left" + } + ] } ] } \ No newline at end of file diff --git a/common/src/main/java/com/hufudb/openhufu/common/exception/ErrorCode.java b/common/src/main/java/com/hufudb/openhufu/common/exception/ErrorCode.java index 83fd7c8e..65360287 100644 --- a/common/src/main/java/com/hufudb/openhufu/common/exception/ErrorCode.java +++ b/common/src/main/java/com/hufudb/openhufu/common/exception/ErrorCode.java @@ -24,6 +24,7 @@ public enum ErrorCode { IMPLEMENTOR_CONFIG_FILE_PATH_NOT_SET(20037, "implementor config file path not set"), IMPLEMENTOR_CONFIG_FILE_NOT_FOUND(20038, "implementor config file: {} not found"), + RANGE_JOIN_LEFT_TABLE_NOT_PUBLIC(20041, "left table in range join must be public"), // udf error UDF_LOAD_FAILED(30001, "udf: {} load failed"), diff --git a/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java b/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java index cf78c47a..35dce8a4 100644 --- a/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java +++ b/core/src/main/java/com/hufudb/openhufu/core/implementor/UserSideImplementor.java @@ -1,5 +1,7 @@ package com.hufudb.openhufu.core.implementor; +import com.hufudb.openhufu.common.exception.ErrorCode; +import com.hufudb.openhufu.common.exception.OpenHuFuException; import com.hufudb.openhufu.core.client.OpenHuFuClient; import com.hufudb.openhufu.core.client.OwnerClient; import java.util.ArrayList; @@ -8,11 +10,15 @@ import java.util.concurrent.Callable; import java.util.concurrent.Future; +import com.hufudb.openhufu.core.implementor.spatial.join.DistanceJoin; +import com.hufudb.openhufu.core.implementor.spatial.join.KNNJoin; import com.hufudb.openhufu.core.implementor.spatial.knn.BinarySearchKNN; +import com.hufudb.openhufu.core.implementor.spatial.knn.KNNConverter; import com.hufudb.openhufu.core.sql.plan.PlanUtils; import com.hufudb.openhufu.data.schema.Schema; import com.hufudb.openhufu.data.storage.*; import com.hufudb.openhufu.data.storage.MultiSourceDataSet.Producer; +import com.hufudb.openhufu.expression.ExpressionUtils; import com.hufudb.openhufu.implementor.PlanImplementor; import com.hufudb.openhufu.interpreter.Interpreter; import com.hufudb.openhufu.plan.BinaryPlan; @@ -102,11 +108,111 @@ DataSet ownerSideQuery(Plan plan) { return concurrentDataSet; } + private boolean isPrivacyRangeJoin(BinaryPlan plan) { + if (plan.getJoinCond().getModifier().equals(Modifier.PUBLIC)) { + return false; + } + if (!plan.getJoinCond().getCondition().getIn(0).getModifier().equals(Modifier.PUBLIC)) { + throw new OpenHuFuException(ErrorCode.RANGE_JOIN_LEFT_TABLE_NOT_PUBLIC); + } + return plan.getJoinCond().getCondition().getStr().equals("dwithin"); + } + + private boolean isPrivacyKNNJoin(BinaryPlan plan) { + if (plan.getJoinCond().getModifier().equals(Modifier.PUBLIC)) { + return false; + } + if (!plan.getJoinCond().getCondition().getIn(0).getModifier().equals(Modifier.PUBLIC)) { + throw new OpenHuFuException(ErrorCode.RANGE_JOIN_LEFT_TABLE_NOT_PUBLIC); + } + return plan.getJoinCond().getCondition().getStr().equals("knn"); + } + + private DataSet privacySpatialJoin(BinaryPlan plan, boolean isDistanceJoin) { + return privacySpatialJoin(plan, isDistanceJoin, false); + } + + private DataSet privacySpatialJoin(BinaryPlan plan, boolean isDistanceJoin, boolean isUsingKNNFunc) { + DataSet left = ownerSideQuery(plan.getChildren().get(0)); + DataSetIterator leftIter = left.getIterator(); + List arrayRows = new ArrayList<>(); + + boolean containsLeftKey = false; + int leftKey = -1; + for (OpenHuFuPlan.Expression expression: plan.getSelectExps()) { + if (expression.getOpType().equals(OpenHuFuPlan.OperatorType.REF) + && expression.getI32() == plan.getJoinCond().getCondition().getIn(0).getI32()) { + containsLeftKey = true; + } + } + if (!containsLeftKey) { + for (int i = 0; i < plan.getChildren().get(0).getSelectExps().size(); i++) { + if (plan.getChildren().get(0).getSelectExps().get(i).getI32() + == plan.getJoinCond().getCondition().getIn(0).getI32()) { + leftKey = i; + break; + } + } + } + + boolean containsRightKey = false; + int rightKey = -1; + for (OpenHuFuPlan.Expression expression: plan.getSelectExps()) { + if (expression.getOpType().equals(OpenHuFuPlan.OperatorType.REF) + && expression.getI32() == plan.getJoinCond().getCondition().getIn(1).getI32()) { + containsRightKey = true; + } + } + if (!containsRightKey) { + for (int i = 0; i < plan.getChildren().get(1).getSelectExps().size(); i++) { + if (plan.getChildren().get(1).getSelectExps().get(i).getI32() + == plan.getJoinCond().getCondition().getIn(1).getI32() - plan.getChildren().get(0).getSelectExps().size()) { + rightKey = i; + break; + } + } + } + while (leftIter.next()) { + int leftRef = plan.getJoinCond().getCondition().getIn(0).getI32(); + DataSet rightDataSet; + if (isDistanceJoin) { + rightDataSet = ownerSideQuery(DistanceJoin + .generateDistanceQueryPlan(plan, leftIter.get(leftRef).toString(), rightKey)); + } + else { + rightDataSet = privacyKNN((UnaryPlan) KNNJoin.generateKNNQueryPlan(plan, leftIter.get(leftRef).toString(), rightKey), isUsingKNNFunc); + } + DataSetIterator rightIter = rightDataSet.getIterator(); + while (rightIter.next()) { + arrayRows.add(ArrayRow.merge(leftIter, rightIter, leftKey)); + LOG.info(ArrayRow.merge(leftIter, rightIter, leftKey).toString()); + } + } + Schema schema; + schema = ExpressionUtils.createSchema(plan.getSelectExps()); + LOG.info(schema.toString()); + return new ArrayDataSet(schema, arrayRows); + } + @Override public DataSet implement(Plan plan) { + LOG.info(plan.toString()); + boolean isUsingKNNFuc = plan instanceof LeafPlan + && !plan.getWhereExps().isEmpty() + && plan.getWhereExps().get(0).getOpType().equals(OpenHuFuPlan.OperatorType.SCALAR_FUNC) + && plan.getWhereExps().get(0).getStr().equals("knn"); + if (isUsingKNNFuc) { + plan = KNNConverter.convertKNN((LeafPlan) plan); + } if (isMultiParty(plan)) { + if (plan instanceof BinaryPlan && isPrivacyRangeJoin((BinaryPlan) plan)) { + return privacySpatialJoin((BinaryPlan) plan, true); + } + if (plan instanceof BinaryPlan && isPrivacyKNNJoin((BinaryPlan) plan)) { + return privacySpatialJoin((BinaryPlan) plan, false, isUsingKNNFuc); + } if (plan instanceof UnaryPlan && isMultiPartySecureKNN((UnaryPlan) plan)) { - return privacyKNN((UnaryPlan) plan); + return privacyKNN((UnaryPlan) plan, isUsingKNNFuc); } // implement on owner side DataSet dataset = ownerSideQuery(plan); @@ -118,7 +224,7 @@ public DataSet implement(Plan plan) { } } - private DataSet privacyKNN(UnaryPlan plan) { + private DataSet privacyKNN(UnaryPlan plan, boolean isUsingKNNFunc) { LOG.info("Using binary-search KNN."); boolean USE_DP = false; int k = plan.getFetch(); @@ -160,11 +266,12 @@ private DataSet privacyKNN(UnaryPlan plan) { right = mid; } else { loop++; - return kNNCircleRangeQuery(plan, mid); + DataSet dataSet = ArrayDataSet.materialize(kNNCircleRangeQuery(plan, mid, isUsingKNNFunc)); + return dataSet; } loop++; } - return kNNCircleRangeQuery(plan, right); + return kNNCircleRangeQuery(plan, right, isUsingKNNFunc); } private double kNNRadiusQuery(UnaryPlan plan) { //todo -sjz @@ -191,8 +298,8 @@ private long privacyCompare(UnaryPlan plan, double range, long k) { long res = (long) dataSet.get(0); return res - k; } - private DataSet kNNCircleRangeQuery(UnaryPlan plan, double range) { - return ownerSideQuery(BinarySearchKNN.generateKNNCircleRangeQueryPlan(plan, range)); + private DataSet kNNCircleRangeQuery(UnaryPlan plan, double range, boolean isUsingKNNFunc) { + return ownerSideQuery(BinarySearchKNN.generateKNNCircleRangeQueryPlan(plan, range, isUsingKNNFunc)); } private boolean isMultiPartySecureKNN(UnaryPlan unary) { LeafPlan leaf = (LeafPlan) unary.getChildren().get(0); diff --git a/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/DistanceJoin.java b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/DistanceJoin.java new file mode 100644 index 00000000..6217f6a6 --- /dev/null +++ b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/DistanceJoin.java @@ -0,0 +1,53 @@ +package com.hufudb.openhufu.core.implementor.spatial.join; + +import com.google.common.collect.ImmutableList; +import com.hufudb.openhufu.data.storage.utils.GeometryUtils; +import com.hufudb.openhufu.expression.ExpressionFactory; +import com.hufudb.openhufu.plan.BinaryPlan; +import com.hufudb.openhufu.plan.LeafPlan; +import com.hufudb.openhufu.plan.Plan; +import com.hufudb.openhufu.proto.OpenHuFuData; +import com.hufudb.openhufu.proto.OpenHuFuPlan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class DistanceJoin { + static final Logger LOG = LoggerFactory.getLogger(DistanceJoin.class); + + public static Plan generateDistanceQueryPlan(BinaryPlan binaryPlan, String point, int rightKey) { + LOG.info(String.valueOf(rightKey)); + LeafPlan originalLeaf = (LeafPlan) binaryPlan.getChildren().get(1); + LeafPlan leafPlan = new LeafPlan(); + leafPlan.setTableName(originalLeaf.getTableName()); + if (rightKey == -1) { + leafPlan.setSelectExps(originalLeaf.getSelectExps()); + } + else { + List selects = new ArrayList<>(); + int i = 0; + for (OpenHuFuPlan.Expression expression: originalLeaf.getSelectExps()) { + if (i != rightKey) { + selects.add(expression); + } + i++; + } + leafPlan.setSelectExps(selects); + } + OpenHuFuPlan.Expression oldDwithin = binaryPlan.getJoinCond().getCondition(); + OpenHuFuPlan.Expression left = ExpressionFactory.createLiteral(OpenHuFuData.ColumnType.GEOMETRY, GeometryUtils.fromString(point)); + OpenHuFuPlan.Expression right = ExpressionFactory.createInputRef(originalLeaf.getSelectExps().get(oldDwithin.getIn(1).getI32() + - binaryPlan.getChildren().get(0).getSelectExps().size()).getI32(), oldDwithin.getIn(1).getOutType(), + oldDwithin.getIn(1).getModifier()); + OpenHuFuPlan.Expression dwithin = ExpressionFactory.createScalarFunc(OpenHuFuData.ColumnType.BOOLEAN, "dwithin", + ImmutableList.of(left, right, oldDwithin.getIn(2))); + LOG.info("rewriting DistanceQueryPlan"); + List whereExps = ImmutableList.of(dwithin); + leafPlan.setWhereExps(whereExps); + leafPlan.setOrders(originalLeaf.getOrders()); + LOG.info(leafPlan.toString()); + return leafPlan; + } +} diff --git a/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/KNNJoin.java b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/KNNJoin.java new file mode 100644 index 00000000..b831d89f --- /dev/null +++ b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/join/KNNJoin.java @@ -0,0 +1,82 @@ +package com.hufudb.openhufu.core.implementor.spatial.join; + +import com.google.common.collect.ImmutableList; +import com.hufudb.openhufu.data.storage.utils.GeometryUtils; +import com.hufudb.openhufu.expression.ExpressionFactory; +import com.hufudb.openhufu.plan.BinaryPlan; +import com.hufudb.openhufu.plan.LeafPlan; +import com.hufudb.openhufu.plan.Plan; +import com.hufudb.openhufu.plan.UnaryPlan; +import com.hufudb.openhufu.proto.OpenHuFuData; +import com.hufudb.openhufu.proto.OpenHuFuPlan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class KNNJoin { + static final Logger LOG = LoggerFactory.getLogger(KNNJoin.class); + + public static Plan generateKNNQueryPlan(BinaryPlan binaryPlan, String point, int rightKey) { + LOG.info(String.valueOf(rightKey)); + LeafPlan originalLeaf = (LeafPlan) binaryPlan.getChildren().get(1); + LeafPlan leafPlan = new LeafPlan(); + leafPlan.setTableName(originalLeaf.getTableName()); + if (rightKey == -1) { + leafPlan.setSelectExps(originalLeaf.getSelectExps()); + } + else { + List selects = new ArrayList<>(); + int i = 0; + for (OpenHuFuPlan.Expression expression: originalLeaf.getSelectExps()) { + if (i != rightKey) { + selects.add(expression); + } + i++; + } + leafPlan.setSelectExps(selects); + } + OpenHuFuPlan.Expression oldDwithin = binaryPlan.getJoinCond().getCondition(); + OpenHuFuPlan.Expression left = ExpressionFactory.createLiteral(OpenHuFuData.ColumnType.GEOMETRY, GeometryUtils.fromString(point)); + OpenHuFuPlan.Expression right = ExpressionFactory.createInputRef(originalLeaf.getSelectExps().get(oldDwithin.getIn(1).getI32() + - binaryPlan.getChildren().get(0).getSelectExps().size()).getI32(), oldDwithin.getIn(1).getOutType(), + oldDwithin.getIn(1).getModifier()); + OpenHuFuPlan.Expression dwithin = ExpressionFactory.createScalarFunc(OpenHuFuData.ColumnType.BOOLEAN, "knn", + ImmutableList.of(left, right, oldDwithin.getIn(2))); + LOG.info("rewriting DistanceQueryPlan"); + List whereExps = ImmutableList.of(dwithin); + leafPlan.setWhereExps(whereExps); + leafPlan.setOrders(originalLeaf.getOrders()); + LOG.info(leafPlan.toString()); + return convertKNN(leafPlan); + } + + public static UnaryPlan convertKNN(LeafPlan plan) { + LOG.info("converting KNN"); + LeafPlan leafPlan = new LeafPlan(); + leafPlan.setTableName(plan.getTableName()); + List selects1 = new ArrayList<>(plan.getSelectExps()); + OpenHuFuPlan.Expression knn = plan.getWhereExps().get(0); + OpenHuFuPlan.Expression distance = ExpressionFactory + .createScalarFunc(OpenHuFuData.ColumnType.DOUBLE, "distance", + ImmutableList.of(knn.getIn(0), knn.getIn(1))); + selects1.add(distance); + leafPlan.setSelectExps(selects1); + OpenHuFuPlan.Collation order1 = OpenHuFuPlan.Collation.newBuilder().setRef(plan.getSelectExps().size()) + .setDirection(OpenHuFuPlan.Direction.ASC).build(); + leafPlan.setOrders(ImmutableList.of(order1)); + leafPlan.setFetch((int) knn.getIn(2).getF64()); + + UnaryPlan unaryPlan = new UnaryPlan(leafPlan); + List selects2 = new ArrayList<>(plan.getSelectExps()); + selects2.add(ExpressionFactory.createInputRef(order1.getRef(), OpenHuFuData.ColumnType.DOUBLE, OpenHuFuData.Modifier.PROTECTED)); + unaryPlan.setSelectExps(selects2); + OpenHuFuPlan.Collation order2 = OpenHuFuPlan.Collation.newBuilder().setRef(plan.getSelectExps().size()) + .setDirection(OpenHuFuPlan.Direction.ASC).build(); + unaryPlan.setOrders(ImmutableList.of(order2)); + unaryPlan.setFetch(leafPlan.getFetch()); + LOG.info(unaryPlan.toString()); + return unaryPlan; + } +} diff --git a/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java index c4bcb313..73b390d2 100644 --- a/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java +++ b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/BinarySearchKNN.java @@ -11,6 +11,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; import java.util.List; public class BinarySearchKNN { @@ -64,11 +65,23 @@ public static Plan generatePrivacyComparePlan(UnaryPlan originalPlan, double ran LOG.info(unaryPlan.toString()); return unaryPlan; } - public static Plan generateKNNCircleRangeQueryPlan(UnaryPlan originalPlan, double range) { + public static Plan generateKNNCircleRangeQueryPlan(UnaryPlan originalPlan, double range, boolean isUsingKNNFunc) { LeafPlan originalLeaf = (LeafPlan) originalPlan.getChildren().get(0); LeafPlan leafPlan = new LeafPlan(); leafPlan.setTableName(originalLeaf.getTableName()); - leafPlan.setSelectExps(originalLeaf.getSelectExps()); + + List selects = new ArrayList<>(); + for (int i = 0; i < originalLeaf.getSelectExps().size(); i++) { + if (i != originalLeaf.getOrders().get(0).getRef()) { + selects.add(originalLeaf.getSelectExps().get(i)); + } + } + if (isUsingKNNFunc) { + leafPlan.setSelectExps(selects); + } + else { + leafPlan.setSelectExps(originalLeaf.getSelectExps()); + } OpenHuFuPlan.Expression distance = originalLeaf.getSelectExps() .get(originalLeaf.getOrders().get(0).getRef()); OpenHuFuPlan.Expression dwithin = ExpressionFactory.createScalarFunc(OpenHuFuData.ColumnType.BOOLEAN, "dwithin", @@ -78,7 +91,6 @@ public static Plan generateKNNCircleRangeQueryPlan(UnaryPlan originalPlan, doubl LOG.info("rewriting KNNCircleRangeQueryPlan"); List whereExps = ImmutableList.of(dwithin); leafPlan.setWhereExps(whereExps); - leafPlan.setOrders(originalLeaf.getOrders()); LOG.info(leafPlan.toString()); return leafPlan; } diff --git a/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/KNNConverter.java b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/KNNConverter.java new file mode 100644 index 00000000..9eb9b4ac --- /dev/null +++ b/core/src/main/java/com/hufudb/openhufu/core/implementor/spatial/knn/KNNConverter.java @@ -0,0 +1,44 @@ +package com.hufudb.openhufu.core.implementor.spatial.knn; + +import com.google.common.collect.ImmutableList; +import com.hufudb.openhufu.expression.ExpressionFactory; +import com.hufudb.openhufu.plan.LeafPlan; +import com.hufudb.openhufu.plan.UnaryPlan; +import com.hufudb.openhufu.proto.OpenHuFuData; +import com.hufudb.openhufu.proto.OpenHuFuPlan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class KNNConverter { + static final Logger LOG = LoggerFactory.getLogger(KNNConverter.class); + public static UnaryPlan convertKNN(LeafPlan plan) { + LOG.info("converting KNN"); + LeafPlan leafPlan = new LeafPlan(); + leafPlan.setTableName(plan.getTableName()); + List selects1 = new ArrayList<>(plan.getSelectExps()); + OpenHuFuPlan.Expression knn = plan.getWhereExps().get(0); + OpenHuFuPlan.Expression distance = ExpressionFactory + .createScalarFunc(OpenHuFuData.ColumnType.DOUBLE, "distance", + ImmutableList.of(knn.getIn(0), knn.getIn(1))); + selects1.add(distance); + leafPlan.setSelectExps(selects1); + OpenHuFuPlan.Collation order1 = OpenHuFuPlan.Collation.newBuilder().setRef(plan.getSelectExps().size()) + .setDirection(OpenHuFuPlan.Direction.ASC).build(); + leafPlan.setOrders(ImmutableList.of(order1)); + leafPlan.setFetch((int) knn.getIn(2).getF64()); + + UnaryPlan unaryPlan = new UnaryPlan(leafPlan); + List selects2 = new ArrayList<>(plan.getSelectExps()); + selects2.add(ExpressionFactory.createInputRef(order1.getRef(), OpenHuFuData.ColumnType.DOUBLE, OpenHuFuData.Modifier.PROTECTED)); + unaryPlan.setSelectExps(selects2); + OpenHuFuPlan.Collation order2 = OpenHuFuPlan.Collation.newBuilder().setRef(plan.getSelectExps().size()) + .setDirection(OpenHuFuPlan.Direction.ASC).build(); + unaryPlan.setOrders(ImmutableList.of(order2)); + unaryPlan.setFetch(leafPlan.getFetch()); + LOG.info(unaryPlan.toString()); + return unaryPlan; + } +} diff --git a/data/src/main/java/com/hufudb/openhufu/data/storage/ArrayRow.java b/data/src/main/java/com/hufudb/openhufu/data/storage/ArrayRow.java index ba4ab38f..7c7e7c5c 100644 --- a/data/src/main/java/com/hufudb/openhufu/data/storage/ArrayRow.java +++ b/data/src/main/java/com/hufudb/openhufu/data/storage/ArrayRow.java @@ -49,6 +49,13 @@ public boolean equals(Object obj) { } } + @Override + public String toString() { + return "ArrayRow{" + + "values=" + Arrays.toString(values) + + '}'; + } + @Override public int hashCode() { return Arrays.hashCode(values); @@ -67,6 +74,24 @@ public static ArrayRow materialize(Row row) { return builder.build(); } + public static ArrayRow merge(Row row1, Row row2, int pass) { + final int size1 = (pass == -1)? row1.size() : row1.size() - 1; + final int size2 = row2.size(); + Builder builder = new Builder(size1 + size2); + int j = 0; + for (int i = 0; i < size1; ++i) { + if (pass == i) { + j++; + } + builder.set(i, row1.get(j)); + j++; + } + for (int i = 0; i < size2; ++i) { + builder.set(i + size1, row2.get(i)); + } + return builder.build(); + } + public static Object[] materialize2ObjectArray(Row row) { final int size = row.size(); Builder builder = new Builder(size); diff --git a/dataset/sample/spatial/database0/join_left.csv b/dataset/sample/spatial/database0/join_left.csv new file mode 100644 index 00000000..61227a01 --- /dev/null +++ b/dataset/sample/spatial/database0/join_left.csv @@ -0,0 +1,11 @@ +JL_ID | JL_POINT +0 | POINT(1404050.076199729 -4762163.267865509) +1 | POINT(9589068.707095977 4113601.232669603) +2 | POINT(6043343.103128726 8207394.418033294) +3 | POINT(1299670.69002592 -5083571.532766891) +4 | POINT(-1322962.500539843 -5899239.147951469) +5 | POINT(4098436.221949188 3036324.412291553) +6 | POINT(9565705.04198173 -7505367.938363314) +7 | POINT(-5735985.3441432975 -7007295.007037635) +8 | POINT(6103121.972730368 -9609767.121882636) +9 | POINT(-1700978.7224226678 8495444.28673973) \ No newline at end of file diff --git a/dataset/sample/spatial/database0/join_left.scm b/dataset/sample/spatial/database0/join_left.scm new file mode 100644 index 00000000..88ab8826 --- /dev/null +++ b/dataset/sample/spatial/database0/join_left.scm @@ -0,0 +1,2 @@ +JL_ID | JL_POINT +LONG | GEOMETRY diff --git a/release/config/spatialOwner1.json b/release/config/spatialOwner1.json index 011db088..33c91472 100644 --- a/release/config/spatialOwner1.json +++ b/release/config/spatialOwner1.json @@ -26,6 +26,24 @@ "columnId": 1 } ] + }, + { + "actualName": "join_left", + "publishedName": "join_left", + "publishedColumns": [ + { + "name": "JL_ID", + "type": "LONG", + "modifier": "public", + "columnId": 0 + }, + { + "name": "JL_POINT", + "type": "GEOMETRY", + "modifier": "public", + "columnId": 1 + } + ] } ] } \ No newline at end of file diff --git a/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java new file mode 100644 index 00000000..a5ad50fa --- /dev/null +++ b/udf/spatial-udf/src/main/java/com/hufudb/openhufu/udf/KNN.java @@ -0,0 +1,44 @@ +package com.hufudb.openhufu.udf; + +import com.google.common.collect.ImmutableList; +import com.hufudb.openhufu.proto.OpenHuFuData.ColumnType; +import org.locationtech.jts.geom.Geometry; + +import java.util.List; + +public class KNN implements ScalarUDF { + + @Override + public String getName() { + return "knn"; + } + + @Override + public ColumnType getOutType(List inTypes) { + return ColumnType.BOOLEAN; + } + + public Boolean knn(Geometry left, Geometry right, Integer distance) { + return (Boolean) implement(ImmutableList.of(left, right, distance)); + } + + @Override + public Object implement(List inputs) { + throw new RuntimeException(); +// if (inputs.size() != 3) { +// LOG.error("KNN UDF expect 3 parameters, but give {}", inputs.size()); +// throw new RuntimeException("KNN UDF expect 3 parameters"); +// } +// if (inputs.get(0) == null || inputs.get(1) == null || inputs.get(2) == null) { +// return null; +// } +// if (!(inputs.get(0) instanceof Geometry) || !(inputs.get(1) instanceof Geometry) +// || !(inputs.get(2) instanceof Integer)) { +// LOG.error("KNN UDF requires (Point, Point, Integer)"); +// throw new RuntimeException("KNN UDF requires (Point, Point)"); +// } +// Geometry left = (Geometry) inputs.get(0); +// Geometry right = (Geometry) inputs.get(1); +// return true; + } +} diff --git a/udf/spatial-udf/src/main/resources/META-INF/services/com.hufudb.openhufu.udf.ScalarUDF b/udf/spatial-udf/src/main/resources/META-INF/services/com.hufudb.openhufu.udf.ScalarUDF index 240517cb..54534b24 100644 --- a/udf/spatial-udf/src/main/resources/META-INF/services/com.hufudb.openhufu.udf.ScalarUDF +++ b/udf/spatial-udf/src/main/resources/META-INF/services/com.hufudb.openhufu.udf.ScalarUDF @@ -1,3 +1,4 @@ com.hufudb.openhufu.udf.Point com.hufudb.openhufu.udf.DWithin -com.hufudb.openhufu.udf.Distance \ No newline at end of file +com.hufudb.openhufu.udf.Distance +com.hufudb.openhufu.udf.KNN \ No newline at end of file