Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial join area #1713

Open
wants to merge 50 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
c7a1ea8
untested changes
Jonathan24680 Dec 17, 2024
a266201
continued development
Jonathan24680 Dec 18, 2024
2cc8504
added all areas to the dataset and some tests
Jonathan24680 Dec 23, 2024
d784513
make file_buffer_size in the test index be a parameter
Jonathan24680 Dec 28, 2024
f6e1647
bugfix area dataset
Jonathan24680 Dec 28, 2024
1a4ff8f
backup commit
Jonathan24680 Dec 30, 2024
e3a2644
area test with points and areas working
Jonathan24680 Dec 31, 2024
f38cb1d
moved some stuff from dev space into the project
Jonathan24680 Jan 2, 2025
4d71c4e
new tests
Jonathan24680 Jan 4, 2025
38c9585
first test works now for the areas as well
Jonathan24680 Jan 11, 2025
ddea6d3
all tests are now executed and passed
Jonathan24680 Jan 12, 2025
1902d44
clang format
Jonathan24680 Jan 12, 2025
31e7054
merge master
Jonathan24680 Jan 12, 2025
cba4324
backup with removed filebuffersize compiling but not testing
Jonathan24680 Jan 17, 2025
d5c934c
make buffer size of test index variable
Jonathan24680 Jan 17, 2025
c1dc963
clean up code
Jonathan24680 Jan 17, 2025
c5284ef
sonarqube
Jonathan24680 Jan 20, 2025
fc1a1fe
correct bug for function without inline in header file
Jonathan24680 Jan 21, 2025
ec75e89
arbitrary geometry types and areas dont need to be approximated as po…
Jonathan24680 Jan 25, 2025
4fd698c
PR feedback
Jonathan24680 Jan 31, 2025
173e646
PR feedback
Jonathan24680 Jan 31, 2025
7dd8156
backup
Jonathan24680 Feb 1, 2025
1c60bf9
PR feedback
Jonathan24680 Feb 1, 2025
3b8ea1a
spelling mistake
Jonathan24680 Feb 1, 2025
a121ef2
Sonarqube issues
Jonathan24680 Feb 4, 2025
1168f32
PR changes
Jonathan24680 Feb 4, 2025
ca7f08e
PR changes
Jonathan24680 Feb 4, 2025
9d2956e
PR changes
Jonathan24680 Feb 4, 2025
14032bd
PR changes
Jonathan24680 Feb 4, 2025
29171b5
Sonarqube issues
Jonathan24680 Feb 4, 2025
457f8d4
format check
Jonathan24680 Feb 4, 2025
bf886c4
backup
Jonathan24680 Feb 7, 2025
10bdf7b
strange bug
Jonathan24680 Feb 7, 2025
5f12f22
solved strange bug
Jonathan24680 Feb 7, 2025
978b512
PR feedback
Jonathan24680 Feb 7, 2025
4132701
Sonarqube issue
Jonathan24680 Feb 7, 2025
d711586
Merge branch 'master' into SpatialJoinArea
Jonathan24680 Feb 7, 2025
f3c5b01
consistent naming of QueryBox and BoundingBox
Jonathan24680 Feb 8, 2025
a46fccd
add test for distance between GeoPoints and Areas
Jonathan24680 Feb 8, 2025
2cc0fef
improve true area distance calculation
Jonathan24680 Feb 8, 2025
96a903a
backup
Jonathan24680 Feb 10, 2025
47ba454
backup
Jonathan24680 Feb 10, 2025
61c6b98
PR changes
Jonathan24680 Feb 10, 2025
c0bf282
clang format
Jonathan24680 Feb 10, 2025
988ba8d
Sonarqube issus and bugfix
Jonathan24680 Feb 12, 2025
e42d9d0
format check
Jonathan24680 Feb 12, 2025
5b88f47
Merge branch 'master' into SpatialJoinArea
Jonathan24680 Feb 14, 2025
5a99080
Sonarqube issue remove unused operator
Jonathan24680 Feb 14, 2025
a7f4695
PR feedback
Jonathan24680 Feb 15, 2025
e5121f6
suggested parentheses around and condition
Jonathan24680 Feb 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
233 changes: 140 additions & 93 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,71 +39,153 @@
};

// ____________________________________________________________________________
std::string SpatialJoinAlgorithms::betweenQuotes(
std::string extractFrom) const {
std::string_view SpatialJoinAlgorithms::betweenQuotes(
std::string_view extractFrom) const {
size_t pos1 = extractFrom.find("\"", 0);
size_t pos2 = extractFrom.find("\"", pos1 + 1);
if (pos1 != std::string::npos && pos2 != std::string::npos) {
return extractFrom.substr(pos1 + 1, pos2 - pos1 - 1);
} else {
return extractFrom;
}

Check warning on line 50 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L49-L50

Added lines #L49 - L50 were not covered by tests
}

struct DistanceVisitor : public boost::static_visitor<double> {
template <typename Geometry1, typename Geometry2>
double operator()(const Geometry1& geom1, const Geometry2& geom2) const {
return bg::distance(geom1, geom2);
}
bool SpatialJoinAlgorithms::getAnyGeometry(const IdTable* idtable, size_t row,
size_t col,
AnyGeometry& geometry) const {
auto printWarning = [alreadyWarned = false,
&spatialJoin = spatialJoin_]() mutable {
if (!alreadyWarned) {
std::string warning =
"The input to a spatial join contained at least one element, "
"that is not a point or polygon geometry and is thus skipped. Note "
"that QLever currently only accepts point or polygon geometries for "
"the spatial joins";
AD_LOG_WARN << warning << std::endl;
alreadyWarned = true;
if (spatialJoin.has_value()) {
AD_CORRECTNESS_CHECK(spatialJoin.value() != nullptr);
spatialJoin.value()->addWarning(warning);
}
}
};

std::string str(betweenQuotes(ExportQueryExecutionTrees::idToStringAndType(
qec_->getIndex(), idtable->at(row, col), {})
.value()
.first));
try {
bg::read_wkt(str, geometry);
} catch (const std::exception& e) {
printWarning();
return false;
}
return true;
}

// ____________________________________________________________________________
double SpatialJoinAlgorithms::convertDegreesToMeters(
AnyGeometry geometry1, AnyGeometry geometry2) const {
return boost::apply_visitor(DistanceVisitor(), geometry1, geometry2) * 78.630;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What exactly is the multiplication doing here, the magic constant should be explained.
Is the function name actually computeDistanceInMeters (you first compute it, and then do some magic multiplication to convert)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a very verbose explanation of this is in the header file. But you're right, the function should be called different, as it first gets the distance in degrees and then converts it. I called it computeDist, since that's what the other compute distance functions are called. Then it's consistent and depending on the parameters available, one can call one or the other computeDist function

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the clarification, I am much less confused now:)

};

// ____________________________________________________________________________
Point SpatialJoinAlgorithms::convertGeoPointToPoint(GeoPoint point) const {
return Point(point.getLng(), point.getLat());
};

Check warning on line 95 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L93-L95

Added lines #L93 - L95 were not covered by tests

// ____________________________________________________________________________
Id SpatialJoinAlgorithms::computeDist(const rtreeEntry& geo1,
const rtreeEntry& geo2) const {
auto convertPoint = [&](AnyGeometry geometry, std::optional<Box> bbox) {
Box areaBox;
if (bbox.has_value()) {
areaBox = bbox.value();
} else {
areaBox = boost::apply_visitor(BoundingBoxVisitor(), geometry);
}
Point p = calculateMidpointOfBox(areaBox);
return GeoPoint(p.get<1>(), p.get<0>());
};
// use the already parsed geometries to calculate the distance
if (useMidpointForAreas_) {
auto point1 = geo1.geoPoint;
auto point2 = geo2.geoPoint;
if (!point1) {
point1 = convertPoint(geo1.geometry.value(), geo1.boundingBox);
}
if (!point2) {
point2 = convertPoint(geo2.geometry.value(), geo2.boundingBox);
}
if (!point1.has_value() || !point2.has_value()) {
return Id::makeUndefined();
}

Check warning on line 122 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L121-L122

Added lines #L121 - L122 were not covered by tests
return Id::makeFromDouble(
ad_utility::detail::wktDistImpl(point1.value(), point2.value()));
} else {
if (geo1.geoPoint.has_value() && geo2.geoPoint.has_value()) {
return Id::makeFromDouble(ad_utility::detail::wktDistImpl(
geo1.geoPoint.value(), geo2.geoPoint.value()));

Check warning on line 128 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L127-L128

Added lines #L127 - L128 were not covered by tests
} else if (geo1.geometry.has_value() && geo2.geometry.has_value()) {
return Id::makeFromDouble(
convertDegreesToMeters(geo1.geometry.value(), geo2.geometry.value()));
} else {

Check warning on line 132 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L130-L132

Added lines #L130 - L132 were not covered by tests
// one point and one area
AnyGeometry geom1 = geo1.geometry.value();
AnyGeometry geom2 = geo2.geometry.value();

Check warning on line 135 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L134-L135

Added lines #L134 - L135 were not covered by tests
if (geo1.geoPoint.has_value()) {
geom1 = convertGeoPointToPoint(geo1.geoPoint.value());

Check warning on line 137 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L137

Added line #L137 was not covered by tests
} else if (geo2.geoPoint.has_value()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is stored e.g. in geo1.geometry.value() if geo1 points to a POINT already? This last part looks a bit confusing.

Copy link
Collaborator Author

@Jonathan24680 Jonathan24680 Feb 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one line above, there is a comment, which states "one point and one area". So this is the case, where we have one GeoPoint and one area. Then geom1 and geom2 get initialized to be the geometries (which is wrong in one case. When the geometry is a GeoPoint, then geomX.geometry is std::nullopt). The if below checks, which of those two is not a geometry but a geopoint. Then it converts geopoint to boost point (as boost requires both types to be boost geometries). You're right, that they should not be initialized with .value, since that is std::nullopt for at least one of them. I still have to write a test, which checks that, but it should work now

geom2 = convertGeoPointToPoint(geo2.geoPoint.value());
}
return Id::makeFromDouble(convertDegreesToMeters(geom1, geom2));
}
}

Check warning on line 143 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L139-L143

Added lines #L139 - L143 were not covered by tests
}

// ____________________________________________________________________________
Id SpatialJoinAlgorithms::computeDist(const IdTable* idTableLeft,
const IdTable* idTableRight,
size_t rowLeft, size_t rowRight,
ColumnIndex leftPointCol,
ColumnIndex rightPointCol) const {
auto getAreaString = [&](const IdTable* idtable, size_t row, size_t col) {
return betweenQuotes(ExportQueryExecutionTrees::idToStringAndType(
qec_->getIndex(), idtable->at(row, col), {})
.value()
.first);
};

auto getAreaOrPointGeometry =
[&](const IdTable* idtable, size_t row, size_t col,
std::optional<GeoPoint> point) -> std::optional<AnyGeometry> {
AnyGeometry geometry;
try {
if (!point) {
boost::geometry::read_wkt(getAreaString(idtable, row, col), geometry);
} else {
geometry = Point(point.value().getLng(), point.value().getLat());

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Easier control flow:
if (point.has_value) { return point; } else { return getAnyGeometry(...)} Everything else then is not nested in the if`.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'(with my suggestion for the AnyGeometry)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't understand what you mean

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (point.has_value()) {
  return std::optional{AnyGeometry{convertPoint...()}};
}
return std::optional{getAnyGeometry(...);

You can drop the std::optional if you explicitly set the return type of the lambda to std::optional<AnyGeometry>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course only relevant, if we still have this code at all.

if (!point) {
if (!getAnyGeometry(idtable, row, col, geometry)) {
// nothing to do. When parsing a point or an area fails, a warning
// message gets printed at another place and the point/area just gets
// skipped
return std::nullopt;
}

Check warning on line 163 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L162-L163

Added lines #L162 - L163 were not covered by tests
} catch (...) {
return std::nullopt;
} else {
geometry = convertGeoPointToPoint(point.value());
}

Check warning on line 166 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L165-L166

Added lines #L165 - L166 were not covered by tests

return geometry;
};

// for now we need to get the data from the disk, but in the future, this
// information will be stored in an ID, just like GeoPoint
auto getAreaPoint = [&](const IdTable* idtable, size_t row,
size_t col) -> std::optional<GeoPoint> {
std::string areastring = getAreaString(idtable, row, col);

Box areaBox;
try {
areaBox = calculateBoundingBoxOfArea(areastring);
} catch (...) {
AnyGeometry geometry;
if (!getAnyGeometry(idtable, row, col, geometry)) {
// nothing to do. When parsing a point or an area fails, a warning message
// gets printed at another place and the point/area just gets skipped
return std::nullopt;
}

Box areaBox = boost::apply_visitor(BoundingBoxVisitor(), geometry);

Point p = calculateMidpointOfBox(areaBox);
return GeoPoint(p.get<1>(), p.get<0>());
};

// if the geometries have not been read already, get them first
auto point1 = getPoint(idTableLeft, rowLeft, leftPointCol);
auto point2 = getPoint(idTableRight, rowRight, rightPointCol);
if (useMidpointForAreas_) {
Expand All @@ -126,14 +208,10 @@
auto geometry2 =
getAreaOrPointGeometry(idTableRight, rowRight, rightPointCol, point2);
if (!geometry1.has_value() || !geometry2.has_value()) {
return Id::makeUndefined();
}

Check warning on line 212 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L211-L212

Added lines #L211 - L212 were not covered by tests
// convert to m and return. Note that the 78630 is an approximation, which
// does not provide accurate results for the poles.
return Id::makeFromDouble(boost::apply_visitor(DistanceVisitor(),
geometry1.value(),
geometry2.value()) *
78630);
return Id::makeFromDouble(
convertDegreesToMeters(geometry1.value(), geometry2.value()));
}
}

Expand Down Expand Up @@ -528,23 +606,6 @@
return std::array{northPoleReached, southPoleReached};
}

struct BoundingBoxVisitor : public boost::static_visitor<Box> {
template <typename Geometry>
Box operator()(const Geometry& geometry) const {
Box box;
bg::envelope(geometry, box);
return box;
}
};

// ____________________________________________________________________________
Box SpatialJoinAlgorithms::calculateBoundingBoxOfArea(
const std::string& wktString) const {
AnyGeometry geometry;
boost::geometry::read_wkt(wktString, geometry);
return boost::apply_visitor(BoundingBoxVisitor(), geometry);
}

// ____________________________________________________________________________
Point SpatialJoinAlgorithms::calculateMidpointOfBox(const Box& box) const {
double lng = (box.min_corner().get<0>() + box.max_corner().get<0>()) / 2.0;
Expand All @@ -568,23 +629,6 @@

// ____________________________________________________________________________
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
auto printWarning = [alreadyWarned = false,
&spatialJoin = spatialJoin_]() mutable {
if (!alreadyWarned) {
std::string warning =
"The input to a spatial join contained at least one element, "
"that is not a point or polygon geometry and is thus skipped. Note "
"that QLever currently only accepts point or polygon geometries for "
"the spatial joins";
AD_LOG_WARN << warning << std::endl;
alreadyWarned = true;
if (spatialJoin.has_value()) {
AD_CORRECTNESS_CHECK(spatialJoin.value() != nullptr);
spatialJoin.value()->addWarning(warning);
}
}
};

const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, rightSelectedCols, numColumns, maxDist,
maxResults] = params_;
Expand All @@ -611,28 +655,30 @@
// get point of row i
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol);
Box bbox;
AnyGeometry geometry;
rtreeEntry entry{i, std::nullopt, std::nullopt, std::nullopt};

if (!geopoint) {
try {
std::string areastring = betweenQuotes(
ExportQueryExecutionTrees::idToStringAndType(
qec_->getIndex(), smallerResult->at(i, smallerResJoinCol), {})
.value()
.first);
bbox = calculateBoundingBoxOfArea(areastring);
} catch (...) {
printWarning();
if (!getAnyGeometry(smallerResult, i, smallerResJoinCol, geometry)) {
// nothing to do. When parsing a point or an area fails, a warning
// message gets printed at another place and the point/area just gets
// skipped
continue;
}
bbox = boost::apply_visitor(BoundingBoxVisitor(), geometry);
entry.geometry = std::move(geometry);
entry.boundingBox = bbox;

} else {
entry.geoPoint = geopoint.value();
// create a box with a side length of at most 1mm to approximate the point
bbox = Box(Point(geopoint.value().getLng(), geopoint.value().getLat()),
Point(geopoint.value().getLng() + 0.00000001,
geopoint.value().getLat() + 0.00000001));
}

// add every box together with the row number into the rtree
rtree.insert(std::make_pair(std::move(bbox), i));
rtree.insert(std::make_pair(std::move(bbox), std::move(entry)));
}

// query rtree with the other child
Expand All @@ -641,24 +687,26 @@
for (size_t i = 0; i < otherResult->numRows(); i++) {
auto geopoint = getPoint(otherResult, i, otherResJoinCol);
std::vector<Box> bbox;
rtreeEntry entry{i, std::nullopt, std::nullopt, std::nullopt};
entry.row = i;

if (!geopoint) {
try {
std::string areastring = betweenQuotes(
ExportQueryExecutionTrees::idToStringAndType(
qec_->getIndex(), otherResult->at(i, otherResJoinCol), {})
.value()
.first);
auto areaBox = calculateBoundingBoxOfArea(areastring);
auto midpoint = calculateMidpointOfBox(areaBox);
bbox = computeBoundingBox(
midpoint,
getMaxDistFromMidpointToAnyPointInsideTheBox(areaBox, midpoint));
} catch (...) {
printWarning();
AnyGeometry geometry;
if (!getAnyGeometry(otherResult, i, otherResJoinCol, geometry)) {
// nothing to do. When parsing a point or an area fails, a warning
// message gets printed at another place and the point/area just gets
// skipped
continue;
}
auto areaBox = boost::apply_visitor(BoundingBoxVisitor(), geometry);
entry.geometry = std::move(geometry);
auto midpoint = calculateMidpointOfBox(areaBox);
bbox = computeBoundingBox(
midpoint,
getMaxDistFromMidpointToAnyPointInsideTheBox(areaBox, midpoint));

} else {
entry.geoPoint = geopoint;
bbox = computeBoundingBox(
Point(geopoint.value().getLng(), geopoint.value().getLat()));
}
Expand All @@ -670,13 +718,12 @@
});

ql::ranges::for_each(results, [&](const Value& res) {
size_t rowLeft = res.second;
size_t rowLeft = res.second.row;
size_t rowRight = i;
if (!leftResSmaller) {
std::swap(rowLeft, rowRight);
}
auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight,
leftJoinCol, rightJoinCol);
auto distance = computeDist(res.second, entry);
AD_CORRECTNESS_CHECK(distance.getDatatype() == Datatype::Double);
if (distance.getDouble() * 1000 <= static_cast<double>(maxDist.value())) {
addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft,
Expand Down
Loading
Loading