From c7a1ea8a88cdc41528d7e47e0f125362ef164da7 Mon Sep 17 00:00:00 2001 From: Jonathan Zeller Date: Wed, 18 Dec 2024 00:01:17 +0100 Subject: [PATCH 01/47] untested changes --- test/engine/SpatialJoinAlgorithmsTest.cpp | 81 +++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp index 36af21d0f2..5b1a685e8c 100644 --- a/test/engine/SpatialJoinAlgorithmsTest.cpp +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -1204,4 +1204,85 @@ TEST(SpatialJoin, isContainedInBoundingBoxes) { } // namespace boundingBox +// ===================================== DEV starting ========================= +namespace development { + +using BoostGeometryNamespace::Box; +using BoostGeometryNamespace::Point; +using BoostGeometryNamespace::Value; +// move this to BoostGeometryNamespace +typedef boost::geometry::model::polygon> Polygon; + + +// move to SpatialJoinTestHelpers, see comment of buildAreaTestQEC +std::string createAreaTestDataset() { + auto addArea = [](std::string& kg, std::string number, std::string name, + std::string area) { + kg += absl::StrCat(" ", name, " . \n", + " \"crossing\" .\n", + " ", area, " .\n" ); + }; + + std::string kg; + // note that i removed all prefixes + addArea(kg, "1", "\"zebra\"", "\"POLYGON((9.3340635 47.4266650,9.3340635 47.4266650,9.3340635 47.4266650,9.3340635 47.4266650,9.3340635 47.4266650))\"^^"); + addArea(kg, "2", "\"trafficLight\"", "\"POLYGON((9.3054501 47.4066706,9.3054501 47.4066706,9.3054501 47.4066706,9.3054501 47.4066706,9.3054501 47.4066706))\"^^"); + addArea(kg, "3", "\"bridge\"", "\"POLYGON((9.3769786 47.4222885,9.3769786 47.4222885,9.3769786 47.4222885,9.3769786 47.4222885,9.3769786 47.4222885))\"^^"); + std::cerr << kg << std::endl; + return kg; +} + +// move to SpatialJoinTestHelpers, but adapt the existing buildTestQEC method by +// adding a parameter bool usePointDataset, which when set to false calls the +// createAreaDataset method or give this parameter to the existing createDataset +// method, which has an if(usePointDataset)... else ... +QueryExecutionContext* buildAreaTestQEC() { + std::string kg = createAreaTestDataset(); + ad_utility::MemorySize blocksizePermutations = 16_MB; + auto qec = ad_utility::testing::getQec(kg, true, true, false, blocksizePermutations, + false); + return qec; +} + +// this function calculates the bounding box of a geometry, which is an area. +// This is different to the query box, which is a box, which contains the area +// where all results are contained in +Box calculateBoundingBoxOfArea(std::string wktString) { + Polygon polygon; + boost::geometry::read_wkt(wktString, polygon); + double minLng = std::numeric_limits::infinity(); + double maxLng = -std::numeric_limits::infinity(); + double minLat = std::numeric_limits::infinity(); + double maxLat = -std::numeric_limits::infinity(); + for (const auto& point : polygon.outer()) { + double lng = boost::geometry::get<0>(point); + double lat = boost::geometry::get<1>(point); + if (lng < minLng){ minLng = lng; } + if (lng > maxLng){ maxLng = lng; } + if (lat < minLat){ minLat = lat; } + if (lat > maxLat){ maxLat = lat; } + } + return Box(Point(minLng, minLat), Point(maxLng, maxLat)); +} + +// calculates the midpoint of the Box +Point calculateMidpointOfBox(Box box) { + double lng = (box.min_corner().get<0>() + box.max_corner().get<0>()) / 2.0; + double lat = (box.min_corner().get<1>() + box.max_corner().get<1>()) / 2.0; + return Point(lng, lat); +} + +TEST(SpatialJoin, development) { + auto qec = buildAreaTestQEC(); + // if the object in the kg has both a point and a polygon representation, then + // the point representation should be used by default. Add this case to the + // test kg to test this behaviour + + // next step: create bounding box for the area of the polygon +} + +} + +// ===================================== DEV ending =========================== + } // namespace From a266201c07723bce601b78a2cf90fd1aaf502eee Mon Sep 17 00:00:00 2001 From: Jonathan Zeller Date: Wed, 18 Dec 2024 22:42:03 +0100 Subject: [PATCH 02/47] continued development --- test/engine/SpatialJoinAlgorithmsTest.cpp | 81 +++++++++++++++++++++-- 1 file changed, 74 insertions(+), 7 deletions(-) diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp index 5b1a685e8c..ba0ac89265 100644 --- a/test/engine/SpatialJoinAlgorithmsTest.cpp +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -782,6 +782,7 @@ ExpectedRowsNearestNeighbors expectedNearestNeighbors = { mergeToRow(Eif, TF, expectedDistUniEif)}}}}}; // test the compute result method on small examples +/* TEST_P(SpatialJoinParamTest, computeResultSmallDatasetLargeChildren) { Row columnNames = { "?name1", "?obj1", "?geo1", @@ -806,6 +807,7 @@ TEST_P(SpatialJoinParamTest, computeResultSmallDatasetLargeChildren) { } } + TEST_P(SpatialJoinParamTest, computeResultSmallDatasetSmallChildren) { Row columnNames{"?obj1", "?point1", "?obj2", "?point2", "?distOfTheTwoObjectsAddedInternally"}; @@ -917,7 +919,7 @@ INSTANTIATE_TEST_SUITE_P( NearestNeighborsConfig{2, 4000}, NearestNeighborsConfig{2, 40}, NearestNeighborsConfig{3, 500000}))); - +*/ } // end of Namespace computeResultTest namespace boundingBox { @@ -1213,6 +1215,27 @@ using BoostGeometryNamespace::Value; // move this to BoostGeometryNamespace typedef boost::geometry::model::polygon> Polygon; +// helper function in debugging for outputting stuff +void print_vecs(std::vector> vec) { + for (size_t i = 0; i < vec.size(); i++) { + for (size_t k = 0; k < vec.at(i).size(); k++) { + std::cerr << vec.at(i).at(k) << " "; + } + std::cerr << std::endl; + } +} + +// helper function in debugging for outputting stuff +void print_vec(std::vector vec) { + for (size_t i = 0; i < vec.size(); i++) { + std::cerr << vec.at(i) << std::endl; + } +} + +void print_box(Box box) { + std::cerr << box.min_corner().get<0>() << " " << box.min_corner().get<1>() << " " + << box.max_corner().get<0>() << " " << box.max_corner().get<1>() << std::endl; +} // move to SpatialJoinTestHelpers, see comment of buildAreaTestQEC std::string createAreaTestDataset() { @@ -1225,10 +1248,10 @@ std::string createAreaTestDataset() { std::string kg; // note that i removed all prefixes - addArea(kg, "1", "\"zebra\"", "\"POLYGON((9.3340635 47.4266650,9.3340635 47.4266650,9.3340635 47.4266650,9.3340635 47.4266650,9.3340635 47.4266650))\"^^"); - addArea(kg, "2", "\"trafficLight\"", "\"POLYGON((9.3054501 47.4066706,9.3054501 47.4066706,9.3054501 47.4066706,9.3054501 47.4066706,9.3054501 47.4066706))\"^^"); - addArea(kg, "3", "\"bridge\"", "\"POLYGON((9.3769786 47.4222885,9.3769786 47.4222885,9.3769786 47.4222885,9.3769786 47.4222885,9.3769786 47.4222885))\"^^"); - std::cerr << kg << std::endl; + addArea(kg, "1", "\"zebra\"", "\"POLYGON((9.33 47.41, 9.31 47.45, 9.32 47.48, 9.35 47.42, 9.33 47.41))\"^^"); + addArea(kg, "2", "\"trafficLight\"", "\"POLYGON((-4.1 10.0, -9.9 10.0, -9.9 -1.0, -4.1 -1.0))\"^^"); + addArea(kg, "3", "\"bridge\"", "\"POLYGON((0.0 0.0, 1.1 0.0, 1.1 1.1, 0.0 1.1, 0.0 0.0))\"^^"); + std::cerr << std::fixed << std::setprecision(10) << kg << std::endl; return kg; } @@ -1247,7 +1270,7 @@ QueryExecutionContext* buildAreaTestQEC() { // this function calculates the bounding box of a geometry, which is an area. // This is different to the query box, which is a box, which contains the area // where all results are contained in -Box calculateBoundingBoxOfArea(std::string wktString) { +Box calculateBoundingBoxOfArea(const std::string& wktString) { Polygon polygon; boost::geometry::read_wkt(wktString, polygon); double minLng = std::numeric_limits::infinity(); @@ -1266,7 +1289,7 @@ Box calculateBoundingBoxOfArea(std::string wktString) { } // calculates the midpoint of the Box -Point calculateMidpointOfBox(Box box) { +Point calculateMidpointOfBox(const Box& box) { double lng = (box.min_corner().get<0>() + box.max_corner().get<0>()) / 2.0; double lat = (box.min_corner().get<1>() + box.max_corner().get<1>()) / 2.0; return Point(lng, lat); @@ -1274,6 +1297,14 @@ Point calculateMidpointOfBox(Box box) { TEST(SpatialJoin, development) { auto qec = buildAreaTestQEC(); + auto firstChild = buildMediumChild(qec, + {"?obj1", std::string{""}, "?name1"}, + {"?obj1", std::string{""}, "?highway"}, + {"?obj1", std::string{""}, "?area"}, "?obj1", "?obj1"); + auto result = firstChild->getResult(); + std::cerr << "========= printing result ===========" << std::endl; + std::cerr << "result size: " << result->idTable().numRows() << std::endl; + print_vec(printTable(qec, result.get())); // if the object in the kg has both a point and a polygon representation, then // the point representation should be used by default. Add this case to the // test kg to test this behaviour @@ -1281,6 +1312,42 @@ TEST(SpatialJoin, development) { // next step: create bounding box for the area of the polygon } +void testBoundingBoxOfAreaOrMidpointOfBox(bool testArea=true) { + auto checkBoundingBox = [](Box box, double minLng, double minLat, double maxLng, double maxLat) { + ASSERT_DOUBLE_EQ(minLng, box.min_corner().get<0>()); + ASSERT_DOUBLE_EQ(minLat, box.min_corner().get<1>()); + ASSERT_DOUBLE_EQ(maxLng, box.max_corner().get<0>()); + ASSERT_DOUBLE_EQ(maxLat, box.max_corner().get<1>()); + }; + + auto checkMidpoint = [](const Point& point, double lng, double lat) { + ASSERT_DOUBLE_EQ(point.get<0>(), lng); + ASSERT_DOUBLE_EQ(point.get<1>(), lat); + }; + + auto a = calculateBoundingBoxOfArea("POLYGON((9.33 47.41, 9.31 47.45, 9.32 47.48, 9.35 47.42, 9.33 47.41))"); // closed polygon + auto b = calculateBoundingBoxOfArea("POLYGON((-4.1 10.0, -9.9 10.0, -9.9 -1.0, -4.1 -1.0))"); // not closed polygon + auto c = calculateBoundingBoxOfArea("POLYGON((0.0 0.0, 1.1 0.0, 1.1 1.1, 0.0 1.1, 0.0 0.0))"); // closed polygon + + if (testArea) { + checkBoundingBox(a, 9.31, 47.41, 9.35, 47.48); + checkBoundingBox(b, -9.9, -1.0, -4.1, 10.0); + checkBoundingBox(c, 0.0, 0.0, 1.1, 1.1); + } else { + checkMidpoint(calculateMidpointOfBox(a), 9.33, 47.445); + checkMidpoint(calculateMidpointOfBox(b), -7.0, 4.5); + checkMidpoint(calculateMidpointOfBox(c), 0.55, 0.55); + } +} + +TEST(SpatialJoin, BoundingBoxOfArea) { + testBoundingBoxOfAreaOrMidpointOfBox(); +} + +TEST(SpatialJoin, MidpointOfBoundingBox) { + testBoundingBoxOfAreaOrMidpointOfBox(false); +} + } // ===================================== DEV ending =========================== From 2cc8504c1ec090f980c9ad9066d590f923a25c2d Mon Sep 17 00:00:00 2001 From: Jonathan Zeller Date: Tue, 24 Dec 2024 00:06:18 +0100 Subject: [PATCH 03/47] added all areas to the dataset and some tests --- test/engine/SpatialJoinAlgorithmsTest.cpp | 86 +++++++++++++++++++++-- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp index ba0ac89265..522a1e3b53 100644 --- a/test/engine/SpatialJoinAlgorithmsTest.cpp +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -1239,18 +1239,29 @@ void print_box(Box box) { // move to SpatialJoinTestHelpers, see comment of buildAreaTestQEC std::string createAreaTestDataset() { - auto addArea = [](std::string& kg, std::string number, std::string name, + /*auto addArea = [](std::string& kg, std::string number, std::string name, std::string area) { kg += absl::StrCat(" ", name, " . \n", " \"crossing\" .\n", " ", area, " .\n" ); + }; */ + auto addArea = [](std::string& kg, std::string number, std::string name, + std::string area) { + kg += absl::StrCat(" ", name, " . \n", + " .\n", + " ", area, " .\n" ); }; std::string kg; // note that i removed all prefixes - addArea(kg, "1", "\"zebra\"", "\"POLYGON((9.33 47.41, 9.31 47.45, 9.32 47.48, 9.35 47.42, 9.33 47.41))\"^^"); - addArea(kg, "2", "\"trafficLight\"", "\"POLYGON((-4.1 10.0, -9.9 10.0, -9.9 -1.0, -4.1 -1.0))\"^^"); - addArea(kg, "3", "\"bridge\"", "\"POLYGON((0.0 0.0, 1.1 0.0, 1.1 1.1, 0.0 1.1, 0.0 0.0))\"^^"); + // addArea(kg, "1", "\"zebra\"", "\"POLYGON((9.33 47.41, 9.31 47.45, 9.32 47.48, 9.35 47.42, 9.33 47.41))\"^^"); + // addArea(kg, "2", "\"trafficLight\"", "\"POLYGON((-4.1 10.0, -9.9 10.0, -9.9 -1.0, -4.1 -1.0))\"^^"); + // addArea(kg, "3", "\"bridge\"", "\"POLYGON((0.0 0.0, 1.1 0.0, 1.1 1.1, 0.0 1.1, 0.0 0.0))\"^^"); + addArea(kg, "1", "\"Uni Freiburg TF\"", "\"POLYGON((7.8346338 48.0126612,7.8348921 48.0123905,7.8349457 48.0124216,7.8349855 48.0124448,7.8353244 48.0126418,7.8354091 48.0126911,7.8352246 48.0129047,7.8351668 48.0128798,7.8349471 48.0127886,7.8347248 48.0126986,7.8346338 48.0126612))\"^^"); + addArea(kg, "2", "\"Minster Freiburg\"", "\"POLYGON((7.8520522 47.9956071,7.8520528 47.9955872,7.8521103 47.995588,7.8521117 47.9955419,7.852113 47.9954975,7.8520523 47.9954968,7.8520527 47.995477,7.8521152 47.9954775,7.8521154 47.9954688,7.8521299 47.995469,7.8521311 47.9954303,7.8521611 47.9954307,7.8521587 47.9954718,7.8522674 47.9954741,7.8522681 47.9954676,7.8522746 47.9954643,7.8522832 47.9954599,7.8522976 47.99546,7.8523031 47.995455,7.8523048 47.9954217,7.8522781 47.9954213,7.8522786 47.9954058,7.8523123 47.9954065,7.852314 47.9953744,7.8523383 47.9953748,7.8523373 47.9954062,7.8524164 47.995408,7.8524176 47.9953858,7.852441 47.9953865,7.8524398 47.9954085,7.8525077 47.9954101,7.8525088 47.9953886,7.8525316 47.9953892,7.8525305 47.9954106,7.8526031 47.9954123,7.8526042 47.9953915,7.8526276 47.9953922,7.8526265 47.9954128,7.8526944 47.9954144,7.8526954 47.9953943,7.8527183 47.9953949,7.8527173 47.9954149,7.8527892 47.9954165,7.8527903 47.9953974,7.8528131 47.9953979,7.8528122 47.9954171,7.852871 47.9954182,7.8528712 47.995416,7.8528791 47.9954112,7.85289 47.9954113,7.8528971 47.9954158,7.8528974 47.9954052,7.8528925 47.9954052,7.8528928 47.9953971,7.8529015 47.9953972,7.8529024 47.9953702,7.852897 47.9953701,7.8528972 47.9953645,7.8529037 47.9953645,7.8529038 47.9953613,7.8529069 47.9953614,7.8529071 47.9953541,7.8529151 47.9953542,7.8529149 47.9953581,7.8529218 47.9953582,7.8529217 47.9953631,7.8529621 47.9953637,7.8529623 47.9953572,7.8529719 47.9953573,7.8529716 47.9953642,7.8530114 47.9953648,7.8530116 47.9953587,7.8530192 47.9953589,7.853019 47.995365,7.8530635 47.9953657,7.8530637 47.9953607,7.8530716 47.9953608,7.8530715 47.9953657,7.8530758 47.9953657,7.8530757 47.9953688,7.8530817 47.9953689,7.8530815 47.9953742,7.8530747 47.9953741,7.8530737 47.9954052,7.8530794 47.9954053,7.8530792 47.995413,7.8530717 47.9954129,7.8530708 47.9954199,7.8531165 47.9954207,7.8531229 47.9954131,7.8531292 47.9954209,7.8531444 47.9954211,7.8531444 47.9954238,7.8531569 47.995424,7.8531661 47.9954152,7.853171 47.9954201,7.853183 47.9954203,7.8531829 47.9954234,7.8531973 47.9954236,7.8531977 47.9954138,7.8532142 47.9954141,7.8532141 47.9954253,7.8532425 47.9954355,7.8532514 47.9954298,7.8532593 47.9954353,7.8532915 47.9954255,7.8532923 47.9954155,7.8533067 47.995416,7.8533055 47.9954261,7.8533304 47.9954368,7.8533399 47.995431,7.85335 47.9954372,7.8533758 47.9954288,7.853377 47.9954188,7.8533932 47.9954192,7.8533924 47.9954298,7.8534151 47.9954395,7.8534278 47.9954345,7.8534373 47.995441,7.8534664 47.995432,7.8534672 47.9954209,7.8534832 47.9954211,7.8534828 47.9954322,7.8535077 47.9954449,7.8535224 47.9954375,7.8535325 47.995448,7.8535644 47.9954403,7.8535717 47.9954305,7.8535866 47.9954356,7.8535796 47.9954443,7.8536079 47.9954674,7.8536221 47.9954629,7.8536221 47.9954735,7.8536573 47.9954801,7.8536707 47.9954728,7.8536813 47.9954812,7.8536686 47.9954876,7.8536776 47.9955168,7.8536958 47.9955192,7.8536876 47.9955286,7.8537133 47.9955444,7.85373 47.9955428,7.8537318 47.9955528,7.8537154 47.9955545,7.8537069 47.9955819,7.8537168 47.995588,7.8537044 47.9955948,7.8537086 47.9956193,7.8537263 47.9956245,7.8537206 47.9956347,7.8537069 47.9956317,7.8536802 47.9956473,7.8536819 47.9956577,7.8536667 47.9956604,7.8536506 47.9956817,7.8536639 47.9956902,7.8536543 47.9956981,7.8536394 47.9956887,7.8536331 47.9956931,7.853609 47.9956954,7.8536024 47.9957048,7.8535868 47.9957028,7.8535591 47.9957206,7.8535642 47.9957285,7.8535487 47.9957327,7.8535423 47.9957215,7.853508 47.9957131,7.8534942 47.9957215,7.8534818 47.9957186,7.8534587 47.9957284,7.853458 47.9957389,7.8534421 47.9957388,7.8534424 47.9957273,7.853418 47.995714,7.8534099 47.9957194,7.8534021 47.995713,7.8533721 47.9957242,7.8533712 47.9957359,7.8533558 47.9957351,7.8533565 47.9957247,7.8533269 47.9957094,7.8533171 47.9957165,7.8533073 47.9957088,7.8532874 47.9957186,7.8532866 47.9957296,7.8532698 47.9957295,7.8532698 47.9957189,7.8532466 47.9957048,7.8532372 47.9957131,7.8532277 47.995705,7.8532014 47.9957171,7.8532009 47.9957284,7.8531844 47.9957281,7.8531847 47.9957174,7.8531778 47.9957102,7.853163 47.9957245,7.8530549 47.9957225,7.8530552 47.9957161,7.8529541 47.9957138,7.8529535 47.9957236,7.8529578 47.9957237,7.8529577 47.9957269,7.852953 47.9957268,7.8529529 47.9957308,7.8529477 47.9957307,7.8529478 47.9957271,7.8528964 47.9957256,7.8528963 47.9957288,7.8528915 47.9957287,7.8528916 47.9957256,7.8528876 47.9957255,7.8528875 47.9957223,7.8528912 47.9957224,7.8528908 47.9957195,7.8528811 47.9957194,7.8527983 47.9957162,7.8527981 47.9957192,7.8527723 47.9957185,7.8527732 47.9957016,7.852703 47.9957003,7.8527021 47.9957175,7.8526791 47.9957171,7.8526788 47.9957225,7.8526097 47.9957225,7.8526099 47.995718,7.8525863 47.9957183,7.8525874 47.9956981,7.8525155 47.9956967,7.8525144 47.995718,7.8524916 47.9957174,7.8524927 47.9956963,7.8524241 47.995695,7.852423 47.9957153,7.8523996 47.9957148,7.8524007 47.9956946,7.8523226 47.9956931,7.8523217 47.9957212,7.8522948 47.9957208,7.8522957 47.9956927,7.8522663 47.9956923,7.8522667 47.9956784,7.8522926 47.9956787,7.8522937 47.9956433,7.8522882 47.995635,7.8522723 47.9956351,7.8522611 47.9956281,7.8522613 47.9956189,7.8521543 47.9956174,7.852153 47.9956591,7.8521196 47.9956587,7.8521209 47.995617,7.8521109 47.9956168,7.8521111 47.9956079,7.8520522 47.9956071))\"^^"); + addArea(kg, "3", "\"London Eye\"", "\"POLYGON((-0.1198608 51.5027451,-0.1197395 51.5027354,-0.1194922 51.5039381,-0.1196135 51.5039478,-0.1198608 51.5027451))\"^^"); + addArea(kg, "4", "\"Statue of liberty\"", "\"POLYGON((-74.0451069 40.6893455,-74.045004 40.6892215,-74.0451023 40.6891073,-74.0449107 40.6890721,-74.0449537 40.6889343,-74.0447746 40.6889506,-74.0446495 40.6888049,-74.0445067 40.6889076,-74.0442008 40.6888563,-74.0441463 40.6890663,-74.0441411 40.6890854,-74.0441339 40.6890874,-74.0441198 40.6890912,-74.0439637 40.6891376,-74.0440941 40.6892849,-74.0440057 40.6894071,-74.0441949 40.6894309,-74.0441638 40.6895702,-74.0443261 40.6895495,-74.0443498 40.6895782,-74.0443989 40.6896372,-74.0444277 40.6896741,-74.0445955 40.6895939,-74.0447392 40.6896561,-74.0447498 40.6896615,-74.0447718 40.6895577,-74.0447983 40.6895442,-74.0448287 40.6895279,-74.0449638 40.6895497,-74.0449628 40.6895443,-74.044961 40.6895356,-74.0449576 40.6895192,-74.044935 40.689421,-74.0451069 40.6893455))\"^^"); + addArea(kg, "5", "\"eiffel tower\"", "\"POLYGON((2.2933119 48.858248,2.2935432 48.8581003,2.2935574 48.8581099,2.2935712 48.8581004,2.2936112 48.8581232,2.2936086 48.8581249,2.293611 48.8581262,2.2936415 48.8581385,2.293672 48.8581477,2.2937035 48.8581504,2.293734 48.858149,2.2937827 48.8581439,2.2938856 48.8581182,2.2939778 48.8580882,2.2940648 48.8580483,2.2941435 48.8579991,2.2941937 48.8579588,2.2942364 48.8579197,2.2942775 48.8578753,2.2943096 48.8578312,2.2943307 48.8577908,2.2943447 48.857745,2.2943478 48.8577118,2.2943394 48.8576885,2.2943306 48.8576773,2.2943205 48.8576677,2.2943158 48.8576707,2.2942802 48.8576465,2.2942977 48.8576355,2.2942817 48.8576248,2.2942926 48.8576181,2.2944653 48.8575069,2.2945144 48.8574753,2.2947414 48.8576291,2.294725 48.8576392,2.2947426 48.857651,2.294706 48.8576751,2.294698 48.8576696,2.2946846 48.8576782,2.2946744 48.8576865,2.2946881 48.8576957,2.2946548 48.857717,2.2946554 48.8577213,2.2946713 48.8577905,2.2946982 48.8578393,2.2947088 48.8578585,2.2947529 48.8579196,2.2948133 48.8579803,2.2948836 48.85803,2.2949462 48.8580637,2.2950051 48.8580923,2.2950719 48.85812,2.2951347 48.8581406,2.2951996 48.8581564,2.2952689 48.8581663,2.295334 48.8581699,2.2953613 48.8581518,2.2953739 48.8581604,2.2953965 48.8581497,2.2954016 48.8581464,2.2953933 48.8581409,2.2954304 48.8581172,2.2954473 48.8581285,2.2954631 48.8581182,2.2956897 48.8582718,2.295653 48.8582954,2.2955837 48.85834,2.2954575 48.8584212,2.2954416 48.858411,2.2954238 48.8584227,2.2953878 48.8583981,2.2953925 48.858395,2.2953701 48.8583857,2.2953419 48.8583779,2.2953057 48.8583737,2.2952111 48.8583776,2.2951081 48.858403,2.2950157 48.8584326,2.2949284 48.8584723,2.2948889 48.8584961,2.2947988 48.8585613,2.2947558 48.8586003,2.2947144 48.8586446,2.294682 48.8586886,2.2946605 48.8587289,2.2946462 48.8587747,2.294644 48.8587962,2.2946462 48.8588051,2.2946486 48.8588068,2.2946938 48.8588377,2.2946607 48.8588587,2.294663 48.8588603,2.294681 48.858849,2.2947169 48.8588737,2.2946988 48.858885,2.2947154 48.8588961,2.2944834 48.8590453,2.2943809 48.8589771,2.2943708 48.8589703,2.2942571 48.8588932,2.2942741 48.8588824,2.2942567 48.8588708,2.2942893 48.8588493,2.294306 48.8588605,2.2943103 48.8588577,2.2942883 48.8588426,2.2943122 48.8588275,2.2943227 48.8588209,2.2943283 48.8588173,2.2943315 48.8588125,2.2943333 48.8588018,2.2943166 48.8587327,2.294301 48.8586978,2.2942783 48.8586648,2.2942406 48.8586191,2.2942064 48.858577,2.2941734 48.8585464,2.2941015 48.8584943,2.2940384 48.8584609,2.2939792 48.8584325,2.293912 48.8584052,2.2938415 48.8583828,2.293784 48.8583695,2.2937145 48.8583599,2.2936514 48.8583593,2.2936122 48.8583846,2.293606 48.8583807,2.2935688 48.8584044,2.2935515 48.8583929,2.293536 48.8584028,2.2933119 48.858248))\"^^"); std::cerr << std::fixed << std::setprecision(10) << kg << std::endl; return kg; } @@ -1295,12 +1306,32 @@ Point calculateMidpointOfBox(const Box& box) { return Point(lng, lat); } +// this function calculates the maximum distance from the midpoint of the box +// to any other point, which is contained in the box. If the midpoint has already +// been calculated, because it is needed in other places as well, it can be given +// to the function, otherwise the function calculates the midpoint itself +double getMaxDistFromMidpointToAnyPointInsideTheBox( + const Box& box, std::optional midpoint = std::nullopt) { + if (!midpoint) { + midpoint = calculateMidpointOfBox(box); + } + double distLng = std::abs(box.min_corner().get<0>() - midpoint.value().get<0>()); + double distLat = std::abs(box.min_corner().get<1>() - midpoint.value().get<1>()); + // convert to meters and return + // return (distLng + distLat) * circumferenceMax_ / 360; // TODO: use this line instead of the one below + return (distLng + distLat) * 40075000 / 360; +} + TEST(SpatialJoin, development) { auto qec = buildAreaTestQEC(); - auto firstChild = buildMediumChild(qec, + /*auto firstChild = buildMediumChild(qec, {"?obj1", std::string{""}, "?name1"}, {"?obj1", std::string{""}, "?highway"}, - {"?obj1", std::string{""}, "?area"}, "?obj1", "?obj1"); + {"?obj1", std::string{""}, "?area"}, "?obj1", "?obj1"); */ + auto firstChild = buildMediumChild(qec, + {"?obj1", std::string{""}, "?name1"}, + {"?obj1", std::string{""}, "?highway"}, + {"?obj1", std::string{""}, "?area"}, "?obj1", "?obj1"); auto result = firstChild->getResult(); std::cerr << "========= printing result ===========" << std::endl; std::cerr << "result size: " << result->idTable().numRows() << std::endl; @@ -1348,6 +1379,49 @@ TEST(SpatialJoin, MidpointOfBoundingBox) { testBoundingBoxOfAreaOrMidpointOfBox(false); } +TEST(SpatialJoin, getMaxDistFromMidpointToAnyPointInsideTheBox) { + // the following polygon is from the eiffel tower + auto area_eiffel = calculateBoundingBoxOfArea("POLYGON((2.2933119 48.858248,2.2935432 48.8581003,2.2935574 48.8581099,2.2935712 48.8581004,2.2936112 48.8581232,2.2936086 48.8581249,2.293611 48.8581262,2.2936415 48.8581385,2.293672 48.8581477,2.2937035 48.8581504,2.293734 48.858149,2.2937827 48.8581439,2.2938856 48.8581182,2.2939778 48.8580882,2.2940648 48.8580483,2.2941435 48.8579991,2.2941937 48.8579588,2.2942364 48.8579197,2.2942775 48.8578753,2.2943096 48.8578312,2.2943307 48.8577908,2.2943447 48.857745,2.2943478 48.8577118,2.2943394 48.8576885,2.2943306 48.8576773,2.2943205 48.8576677,2.2943158 48.8576707,2.2942802 48.8576465,2.2942977 48.8576355,2.2942817 48.8576248,2.2942926 48.8576181,2.2944653 48.8575069,2.2945144 48.8574753,2.2947414 48.8576291,2.294725 48.8576392,2.2947426 48.857651,2.294706 48.8576751,2.294698 48.8576696,2.2946846 48.8576782,2.2946744 48.8576865,2.2946881 48.8576957,2.2946548 48.857717,2.2946554 48.8577213,2.2946713 48.8577905,2.2946982 48.8578393,2.2947088 48.8578585,2.2947529 48.8579196,2.2948133 48.8579803,2.2948836 48.85803,2.2949462 48.8580637,2.2950051 48.8580923,2.2950719 48.85812,2.2951347 48.8581406,2.2951996 48.8581564,2.2952689 48.8581663,2.295334 48.8581699,2.2953613 48.8581518,2.2953739 48.8581604,2.2953965 48.8581497,2.2954016 48.8581464,2.2953933 48.8581409,2.2954304 48.8581172,2.2954473 48.8581285,2.2954631 48.8581182,2.2956897 48.8582718,2.295653 48.8582954,2.2955837 48.85834,2.2954575 48.8584212,2.2954416 48.858411,2.2954238 48.8584227,2.2953878 48.8583981,2.2953925 48.858395,2.2953701 48.8583857,2.2953419 48.8583779,2.2953057 48.8583737,2.2952111 48.8583776,2.2951081 48.858403,2.2950157 48.8584326,2.2949284 48.8584723,2.2948889 48.8584961,2.2947988 48.8585613,2.2947558 48.8586003,2.2947144 48.8586446,2.294682 48.8586886,2.2946605 48.8587289,2.2946462 48.8587747,2.294644 48.8587962,2.2946462 48.8588051,2.2946486 48.8588068,2.2946938 48.8588377,2.2946607 48.8588587,2.294663 48.8588603,2.294681 48.858849,2.2947169 48.8588737,2.2946988 48.858885,2.2947154 48.8588961,2.2944834 48.8590453,2.2943809 48.8589771,2.2943708 48.8589703,2.2942571 48.8588932,2.2942741 48.8588824,2.2942567 48.8588708,2.2942893 48.8588493,2.294306 48.8588605,2.2943103 48.8588577,2.2942883 48.8588426,2.2943122 48.8588275,2.2943227 48.8588209,2.2943283 48.8588173,2.2943315 48.8588125,2.2943333 48.8588018,2.2943166 48.8587327,2.294301 48.8586978,2.2942783 48.8586648,2.2942406 48.8586191,2.2942064 48.858577,2.2941734 48.8585464,2.2941015 48.8584943,2.2940384 48.8584609,2.2939792 48.8584325,2.293912 48.8584052,2.2938415 48.8583828,2.293784 48.8583695,2.2937145 48.8583599,2.2936514 48.8583593,2.2936122 48.8583846,2.293606 48.8583807,2.2935688 48.8584044,2.2935515 48.8583929,2.293536 48.8584028,2.2933119 48.858248))"); + auto midpoint_eiffel = calculateMidpointOfBox(area_eiffel); + print_box(area_eiffel); + std::cerr << midpoint_eiffel.get<0>() << " " << midpoint_eiffel.get<1>() << std::endl; + // call the function without the precalculated midpoint, the upper bound max + // distance needs to be bigger than 130 (the tower has a square base of length + // 125m. Therefore from the midpoint to the side of the box and then to the + // top of the box results in 125m/2 + 125m/2 = 125m). As the tower is not that + // near to the equator and the square base has a worst case alignment to the + // longitude and latitude lines (45 degrees tilted), the distance estimate + // gets a little more than 125m (it's upper bound estimate is 219m) + ASSERT_GE(getMaxDistFromMidpointToAnyPointInsideTheBox(area_eiffel), 125); + ASSERT_DOUBLE_EQ(getMaxDistFromMidpointToAnyPointInsideTheBox(area_eiffel), getMaxDistFromMidpointToAnyPointInsideTheBox(area_eiffel, midpoint_eiffel)); + + // the following polygon is from the Minster of Freiburg + auto area_minster = calculateBoundingBoxOfArea("POLYGON((7.8520522 47.9956071,7.8520528 47.9955872,7.8521103 47.995588,7.8521117 47.9955419,7.852113 47.9954975,7.8520523 47.9954968,7.8520527 47.995477,7.8521152 47.9954775,7.8521154 47.9954688,7.8521299 47.995469,7.8521311 47.9954303,7.8521611 47.9954307,7.8521587 47.9954718,7.8522674 47.9954741,7.8522681 47.9954676,7.8522746 47.9954643,7.8522832 47.9954599,7.8522976 47.99546,7.8523031 47.995455,7.8523048 47.9954217,7.8522781 47.9954213,7.8522786 47.9954058,7.8523123 47.9954065,7.852314 47.9953744,7.8523383 47.9953748,7.8523373 47.9954062,7.8524164 47.995408,7.8524176 47.9953858,7.852441 47.9953865,7.8524398 47.9954085,7.8525077 47.9954101,7.8525088 47.9953886,7.8525316 47.9953892,7.8525305 47.9954106,7.8526031 47.9954123,7.8526042 47.9953915,7.8526276 47.9953922,7.8526265 47.9954128,7.8526944 47.9954144,7.8526954 47.9953943,7.8527183 47.9953949,7.8527173 47.9954149,7.8527892 47.9954165,7.8527903 47.9953974,7.8528131 47.9953979,7.8528122 47.9954171,7.852871 47.9954182,7.8528712 47.995416,7.8528791 47.9954112,7.85289 47.9954113,7.8528971 47.9954158,7.8528974 47.9954052,7.8528925 47.9954052,7.8528928 47.9953971,7.8529015 47.9953972,7.8529024 47.9953702,7.852897 47.9953701,7.8528972 47.9953645,7.8529037 47.9953645,7.8529038 47.9953613,7.8529069 47.9953614,7.8529071 47.9953541,7.8529151 47.9953542,7.8529149 47.9953581,7.8529218 47.9953582,7.8529217 47.9953631,7.8529621 47.9953637,7.8529623 47.9953572,7.8529719 47.9953573,7.8529716 47.9953642,7.8530114 47.9953648,7.8530116 47.9953587,7.8530192 47.9953589,7.853019 47.995365,7.8530635 47.9953657,7.8530637 47.9953607,7.8530716 47.9953608,7.8530715 47.9953657,7.8530758 47.9953657,7.8530757 47.9953688,7.8530817 47.9953689,7.8530815 47.9953742,7.8530747 47.9953741,7.8530737 47.9954052,7.8530794 47.9954053,7.8530792 47.995413,7.8530717 47.9954129,7.8530708 47.9954199,7.8531165 47.9954207,7.8531229 47.9954131,7.8531292 47.9954209,7.8531444 47.9954211,7.8531444 47.9954238,7.8531569 47.995424,7.8531661 47.9954152,7.853171 47.9954201,7.853183 47.9954203,7.8531829 47.9954234,7.8531973 47.9954236,7.8531977 47.9954138,7.8532142 47.9954141,7.8532141 47.9954253,7.8532425 47.9954355,7.8532514 47.9954298,7.8532593 47.9954353,7.8532915 47.9954255,7.8532923 47.9954155,7.8533067 47.995416,7.8533055 47.9954261,7.8533304 47.9954368,7.8533399 47.995431,7.85335 47.9954372,7.8533758 47.9954288,7.853377 47.9954188,7.8533932 47.9954192,7.8533924 47.9954298,7.8534151 47.9954395,7.8534278 47.9954345,7.8534373 47.995441,7.8534664 47.995432,7.8534672 47.9954209,7.8534832 47.9954211,7.8534828 47.9954322,7.8535077 47.9954449,7.8535224 47.9954375,7.8535325 47.995448,7.8535644 47.9954403,7.8535717 47.9954305,7.8535866 47.9954356,7.8535796 47.9954443,7.8536079 47.9954674,7.8536221 47.9954629,7.8536221 47.9954735,7.8536573 47.9954801,7.8536707 47.9954728,7.8536813 47.9954812,7.8536686 47.9954876,7.8536776 47.9955168,7.8536958 47.9955192,7.8536876 47.9955286,7.8537133 47.9955444,7.85373 47.9955428,7.8537318 47.9955528,7.8537154 47.9955545,7.8537069 47.9955819,7.8537168 47.995588,7.8537044 47.9955948,7.8537086 47.9956193,7.8537263 47.9956245,7.8537206 47.9956347,7.8537069 47.9956317,7.8536802 47.9956473,7.8536819 47.9956577,7.8536667 47.9956604,7.8536506 47.9956817,7.8536639 47.9956902,7.8536543 47.9956981,7.8536394 47.9956887,7.8536331 47.9956931,7.853609 47.9956954,7.8536024 47.9957048,7.8535868 47.9957028,7.8535591 47.9957206,7.8535642 47.9957285,7.8535487 47.9957327,7.8535423 47.9957215,7.853508 47.9957131,7.8534942 47.9957215,7.8534818 47.9957186,7.8534587 47.9957284,7.853458 47.9957389,7.8534421 47.9957388,7.8534424 47.9957273,7.853418 47.995714,7.8534099 47.9957194,7.8534021 47.995713,7.8533721 47.9957242,7.8533712 47.9957359,7.8533558 47.9957351,7.8533565 47.9957247,7.8533269 47.9957094,7.8533171 47.9957165,7.8533073 47.9957088,7.8532874 47.9957186,7.8532866 47.9957296,7.8532698 47.9957295,7.8532698 47.9957189,7.8532466 47.9957048,7.8532372 47.9957131,7.8532277 47.995705,7.8532014 47.9957171,7.8532009 47.9957284,7.8531844 47.9957281,7.8531847 47.9957174,7.8531778 47.9957102,7.853163 47.9957245,7.8530549 47.9957225,7.8530552 47.9957161,7.8529541 47.9957138,7.8529535 47.9957236,7.8529578 47.9957237,7.8529577 47.9957269,7.852953 47.9957268,7.8529529 47.9957308,7.8529477 47.9957307,7.8529478 47.9957271,7.8528964 47.9957256,7.8528963 47.9957288,7.8528915 47.9957287,7.8528916 47.9957256,7.8528876 47.9957255,7.8528875 47.9957223,7.8528912 47.9957224,7.8528908 47.9957195,7.8528811 47.9957194,7.8527983 47.9957162,7.8527981 47.9957192,7.8527723 47.9957185,7.8527732 47.9957016,7.852703 47.9957003,7.8527021 47.9957175,7.8526791 47.9957171,7.8526788 47.9957225,7.8526097 47.9957225,7.8526099 47.995718,7.8525863 47.9957183,7.8525874 47.9956981,7.8525155 47.9956967,7.8525144 47.995718,7.8524916 47.9957174,7.8524927 47.9956963,7.8524241 47.995695,7.852423 47.9957153,7.8523996 47.9957148,7.8524007 47.9956946,7.8523226 47.9956931,7.8523217 47.9957212,7.8522948 47.9957208,7.8522957 47.9956927,7.8522663 47.9956923,7.8522667 47.9956784,7.8522926 47.9956787,7.8522937 47.9956433,7.8522882 47.995635,7.8522723 47.9956351,7.8522611 47.9956281,7.8522613 47.9956189,7.8521543 47.9956174,7.852153 47.9956591,7.8521196 47.9956587,7.8521209 47.995617,7.8521109 47.9956168,7.8521111 47.9956079,7.8520522 47.9956071))"); + auto midpoint_minster = calculateMidpointOfBox(area_minster); + print_box(area_minster); + std::cerr << midpoint_minster.get<0>() << " " << midpoint_minster.get<1>() << std::endl; + ASSERT_GE(getMaxDistFromMidpointToAnyPointInsideTheBox(area_minster), 80); + ASSERT_DOUBLE_EQ(getMaxDistFromMidpointToAnyPointInsideTheBox(area_minster), getMaxDistFromMidpointToAnyPointInsideTheBox(area_minster, midpoint_minster)); + + // the following polygon is from the university building 101 in freiburg + auto area_uni = calculateBoundingBoxOfArea("POLYGON((7.8346338 48.0126612,7.8348921 48.0123905,7.8349457 48.0124216,7.8349855 48.0124448,7.8353244 48.0126418,7.8354091 48.0126911,7.8352246 48.0129047,7.8351668 48.0128798,7.8349471 48.0127886,7.8347248 48.0126986,7.8346338 48.0126612))"); + auto midpoint_uni = calculateMidpointOfBox(area_uni); + ASSERT_GE(getMaxDistFromMidpointToAnyPointInsideTheBox(area_uni), 40); + ASSERT_DOUBLE_EQ(getMaxDistFromMidpointToAnyPointInsideTheBox(area_uni), getMaxDistFromMidpointToAnyPointInsideTheBox(area_uni, midpoint_uni)); + + // the following polygon is from the London Eye + auto area_eye = calculateBoundingBoxOfArea("POLYGON((-0.1198608 51.5027451,-0.1197395 51.5027354,-0.1194922 51.5039381,-0.1196135 51.5039478,-0.1198608 51.5027451))"); + auto midpoint_eye = calculateMidpointOfBox(area_eye); + ASSERT_GE(getMaxDistFromMidpointToAnyPointInsideTheBox(area_eye), 70); + ASSERT_DOUBLE_EQ(getMaxDistFromMidpointToAnyPointInsideTheBox(area_eye), getMaxDistFromMidpointToAnyPointInsideTheBox(area_eye, midpoint_eye)); + + // the following polygon is from the Statue of liberty + auto area_statue = calculateBoundingBoxOfArea("POLYGON((-74.0451069 40.6893455,-74.045004 40.6892215,-74.0451023 40.6891073,-74.0449107 40.6890721,-74.0449537 40.6889343,-74.0447746 40.6889506,-74.0446495 40.6888049,-74.0445067 40.6889076,-74.0442008 40.6888563,-74.0441463 40.6890663,-74.0441411 40.6890854,-74.0441339 40.6890874,-74.0441198 40.6890912,-74.0439637 40.6891376,-74.0440941 40.6892849,-74.0440057 40.6894071,-74.0441949 40.6894309,-74.0441638 40.6895702,-74.0443261 40.6895495,-74.0443498 40.6895782,-74.0443989 40.6896372,-74.0444277 40.6896741,-74.0445955 40.6895939,-74.0447392 40.6896561,-74.0447498 40.6896615,-74.0447718 40.6895577,-74.0447983 40.6895442,-74.0448287 40.6895279,-74.0449638 40.6895497,-74.0449628 40.6895443,-74.044961 40.6895356,-74.0449576 40.6895192,-74.044935 40.689421,-74.0451069 40.6893455))"); + auto midpoint_statue = calculateMidpointOfBox(area_statue); + ASSERT_GE(getMaxDistFromMidpointToAnyPointInsideTheBox(area_statue), 100); + ASSERT_DOUBLE_EQ(getMaxDistFromMidpointToAnyPointInsideTheBox(area_statue), getMaxDistFromMidpointToAnyPointInsideTheBox(area_statue, midpoint_statue)); +} + } // ===================================== DEV ending =========================== From d784513a0796a85e50913ba55b344028f7417f8b Mon Sep 17 00:00:00 2001 From: Jonathan Zeller Date: Sat, 28 Dec 2024 14:47:22 +0100 Subject: [PATCH 04/47] make file_buffer_size in the test index be a parameter --- test/engine/SpatialJoinAlgorithmsTest.cpp | 6 +++--- test/util/IndexTestHelpers.cpp | 8 ++++---- test/util/IndexTestHelpers.h | 6 ++++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp index 522a1e3b53..11caa9f7d4 100644 --- a/test/engine/SpatialJoinAlgorithmsTest.cpp +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -1274,7 +1274,7 @@ QueryExecutionContext* buildAreaTestQEC() { std::string kg = createAreaTestDataset(); ad_utility::MemorySize blocksizePermutations = 16_MB; auto qec = ad_utility::testing::getQec(kg, true, true, false, blocksizePermutations, - false); + false, true, std::nullopt, 10000); return qec; } @@ -1330,8 +1330,8 @@ TEST(SpatialJoin, development) { {"?obj1", std::string{""}, "?area"}, "?obj1", "?obj1"); */ auto firstChild = buildMediumChild(qec, {"?obj1", std::string{""}, "?name1"}, - {"?obj1", std::string{""}, "?highway"}, - {"?obj1", std::string{""}, "?area"}, "?obj1", "?obj1"); + {"?obj1", std::string{""}, "?geo1"}, + {"?geo1", std::string{""}, "?area"}, "?obj1", "?geo1"); auto result = firstChild->getResult(); std::cerr << "========= printing result ===========" << std::endl; std::cerr << "result size: " << result->idTable().numRows() << std::endl; diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 0dcfd334a6..b788662dfc 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -141,7 +141,7 @@ Index makeTestIndex(const std::string& indexBasename, ad_utility::MemorySize blocksizePermutations, bool createTextIndex, bool addWordsFromLiterals, std::optional> - contentsOfWordsFileAndDocsFile) { + contentsOfWordsFileAndDocsFile, unsigned long FILE_BUFFER_SIZE_) { // Ignore the (irrelevant) log output of the index building and loading during // these tests. static std::ostringstream ignoreLogStream; @@ -155,7 +155,7 @@ Index makeTestIndex(const std::string& indexBasename, "\"zz\"@en .