-
Notifications
You must be signed in to change notification settings - Fork 473
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Fast isValidCell #496
base: master
Are you sure you want to change the base?
Changes from 23 commits
196b271
5125e7b
aa0e572
744c023
ea83ff9
1b63946
0ca2c28
e898d63
983cb3d
5bcc2ee
3e912d8
bef505f
63962ed
0f18b18
c7a32f2
c22fa9f
47b6584
2afecd6
dde10bb
9edb39a
5e18135
5d68649
65a5e89
5a70747
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,12 +83,14 @@ H3Error H3_EXPORT(h3ToString)(H3Index h, char *str, size_t sz) { | |
return E_SUCCESS; | ||
} | ||
|
||
/** | ||
* Returns whether or not an H3 index is a valid cell (hexagon or pentagon). | ||
* @param h The H3 index to validate. | ||
* @return 1 if the H3 index if valid, and 0 if it is not. | ||
*/ | ||
int H3_EXPORT(isValidCell)(H3Index h) { | ||
// Get Top t bits from h | ||
#define GT(h, t) ((h) >> (64 - (t))) | ||
|
||
static const bool isBaseCellPentagonArr[128] = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this an improvement over |
||
[4] = 1, [14] = 1, [24] = 1, [38] = 1, [49] = 1, [58] = 1, | ||
[63] = 1, [72] = 1, [83] = 1, [97] = 1, [107] = 1, [117] = 1}; | ||
|
||
int _isValidCell_old(H3Index h) { | ||
if (H3_GET_HIGH_BIT(h) != 0) return 0; | ||
|
||
if (H3_GET_MODE(h) != H3_CELL_MODE) return 0; | ||
|
@@ -129,6 +131,263 @@ int H3_EXPORT(isValidCell)(H3Index h) { | |
return 1; | ||
} | ||
|
||
int _isValidCell_pop(H3Index h) { | ||
// Implementation strategy: | ||
// | ||
// Walk from high to low bits, checking validity of | ||
// groups of bits as we go. After each check, shift bits off | ||
// to the left, so that the next relevant group is at the | ||
// highest bit location in the H3 Index, which we can | ||
// easily read off with the `GT` macro. This strategy helps | ||
// us avoid re-computing shifts and masks for each group. | ||
// | ||
// | Region | # bits | | ||
// |------------|--------| | ||
// | High | 1 | | ||
// | Mode | 4 | | ||
// | Reserved | 3 | | ||
// | Resolution | 4 | | ||
// | Base Cell | 7 | | ||
// | Digit 1 | 3 | | ||
// | Digit 2 | 3 | | ||
// | ... | ... | | ||
// | Digit 15 | 3 | | ||
// | ||
// Additionally, we try to group operations and avoid loops when possible. | ||
|
||
// The 1 high bit should be 0b0 | ||
// The 4 mode bits should be 0b0001 (H3_CELL_MODE) | ||
// The 3 reserved bits should be 0b000 | ||
// In total, the top 8 bits should be 0b00001000 | ||
if (GT(h, 8) != 0b00001000) return 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Binary There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternative implementation: if (GT(h, 1) != 0) return 0;
h <<= 1;
if (GT(h, 4) != 1) return 0;
h <<= 4;
if (GT(h, 3) != 0) return 0;
h <<= 3; This alternative might be clearer, and Clang is even able to compile each version to the same instructions. However, GCC doesn't seem to produce the same for the longer version: https://godbolt.org/z/sjKed8jn1 (Even though the last shift in each example is optimized out to a no-op, still gives you the general idea.) |
||
h <<= 8; | ||
|
||
// Number of bits in each of the resolution, base cell, and digit regions. | ||
const int nBitsRes = 4; | ||
const int nBitsBaseCell = 7; | ||
const int nBitsDigit = H3_PER_DIGIT_OFFSET; // ... == 3 | ||
|
||
// No need to check resolution; any 4 bits give a valid resolution. | ||
const int res = GT(h, nBitsRes); | ||
h <<= nBitsRes; | ||
|
||
// Check that base cell number is valid. | ||
const int bc = GT(h, nBitsBaseCell); | ||
if (bc >= NUM_BASE_CELLS) return 0; | ||
h <<= nBitsBaseCell; | ||
|
||
// six shifts up at this point. can reduce with masking? is that faster? | ||
// make input const!? instead: mask-equals, mask-shift, mask-shift easy test | ||
// to see what might be faster, mask vs shift? | ||
|
||
// Now check that each resolution digit is valid. | ||
// Let `r` denote the resolution we're currently checking. | ||
int r = 1; | ||
|
||
// Pentagon cells start with a sequence of 0's (CENTER_DIGIT's). | ||
// The first nonzero digit can't be a 1 (i.e., "deleted subsequence", | ||
// PENTAGON_SKIPPED_DIGIT, or K_AXES_DIGIT). | ||
// Test for pentagon base cell first to avoid this loop if possible. | ||
if (isBaseCellPentagonArr[bc]) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using this lookup table seems to be a bit faster than calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternative: int _isBaseCellPentagon(int baseCell) {
switch(baseCell) {
case 4:
case 14:
case 24:
case 38:
case 49:
case 58:
case 63:
case 72:
case 83:
case 97:
case 107:
case 117:
return 1;
default:
return 0;
}
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you can use |
||
for (; r <= res; r++) { | ||
int d = GT( | ||
h, nBitsDigit); // idea: could do david's masking thing here. | ||
// makes this loop not mutate anything... | ||
if (d == 0) { | ||
h <<= nBitsDigit; // nice because this just becomes `continue` | ||
} else if (d == 1) { | ||
return 0; | ||
} else { | ||
break; | ||
// But don't increment `r`, since we still need to | ||
// check that it isn't a 7. | ||
} | ||
} | ||
} | ||
|
||
// After (possibly) taking care of pentagon logic, check that | ||
// the remaining digits up to `res` are not 7 (INVALID_DIGIT). | ||
// Don't see a way to avoid this loop :( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could unroll it into groups of 4 or 8, possibly with a macro to simplify the code. This would only work for hex base cells though, or for pentagon base cells if you re-check all the digits you just checked. Might be worth trying. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if this PR is still alive, but I've just stumbled on it. Last week I've implemented my own quick The search for the pattern By digging into the annals of the Old Gods, a.k.a. Tweaking the bitmasks to works on 3-bit instead of 8-bit does the trick and we end up with a code that would rougly look like #include<stdint.h>
#include<stdio.h>
#include<assert.h>
#define MAX_RESOLUTION 15
#define CELL_BITSIZE 3
int has_invalid_digit(uint64_t index) {
static const uint64_t LO_MAGIC = 0x49249249249ULL; // ...001 001 001...
static const uint64_t HI_MAGIC = 0x124924924924ULL; // ...100 100 100...
const uint64_t resolution = (index >> 52 & 0xF);
const uint64_t unused_count = MAX_RESOLUTION - resolution;
const uint64_t unused_bitsize = unused_count * CELL_BITSIZE;
const uint64_t digits_mask = (1ULL << (resolution * CELL_BITSIZE)) - 1;
const uint64_t digits = (index >> unused_bitsize) & digits_mask;
return ((~digits - LO_MAGIC) & (digits & HI_MAGIC)) != 0ULL;
}
int main(void) {
// Valid H3 index
// 0-0001-000-1100-0010101-110-101-110-001-100-000-101-001-100-110-110-101-111-111-111
assert(!has_invalid_digit(0x08c2bae305336bffULL));
// First digit invalid.
// 0-0001-000-1100-0010101-111-101-110-001-100-000-101-001-100-110-110-101-111-111-111
assert(has_invalid_digit(0x08c2bee305336bffULL));
// Digit in the middle invalid.
// 0-0001-000-1100-0010101-110-101-110-001-100-111-101-001-100-110-110-101-111-111-111
assert(has_invalid_digit(0x08c2bae33d336bffULL));
// Last digit invalid.
// 0-0001-000-1100-0010101-110-101-110-001-100-000-101-001-100-110-110-111-111-111-111
assert(has_invalid_digit(0x08c2bae305336fffULL));
return 0;
} So, in the end we can replace the loop by For sure, this would deserve some nice comments, because it's not obvious on a first read (I went heavy on the comments for my implementation xD). If something is not clear I can give a more detailed explanation (but that basically boils down on how Alan Mycroft's null-byte detection works). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wow, this is awesome! What a great find! I spent a little time reworking this PR to add your logic. It definitely helps on the benchmarks. It looks like you also came up with some improvements for the pentagon branch here: nmandery/h3ron#34 I'll have to parse that as well. Also happy if you want to make a pull against this PR! |
||
for (; r <= res; r++) { | ||
if (GT(h, nBitsDigit) == 7) return 0; | ||
h <<= nBitsDigit; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assuming bitshifts are as expensive as they were when I was in college, it may make sense to just have an array of 16 masks and do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran some toy timing tests and didn't see much difference between masking vs shifting: https://gist.github.com/ajfriend/32571f0af1f3ea3133b6836fc861c730 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wouldn't be surprised if both code generate the same (or almost) assembly behind. Compiler are quite clever for this kind of simple expression nowadays, like you don't have to write |
||
} | ||
|
||
// if we make it const h, then we can move this code up front, to loops | ||
// happen last Now check that all the unused digits after `res` are set to 7 | ||
// (INVALID_DIGIT). Bit shift operations allow us to avoid looping through | ||
// digits; this saves time in benchmarks. | ||
int shift = (15 - res) * 3; | ||
uint64_t m = 0; | ||
m = ~m; | ||
m >>= shift; // can i do this by shifting left and avoid the next negation? | ||
m = ~m; | ||
if (h != m) return 0; | ||
|
||
// add the timing code to the main repo in a PR before landing this, so its | ||
// easy to get comparison times | ||
|
||
// If no flaws were identified above, then the index is a valid H3 cell. | ||
return 1; | ||
} | ||
|
||
int _isValidCell_const(const H3Index h) { | ||
// Implementation strategy: | ||
// | ||
// Walk from high to low bits, checking validity of | ||
// groups of bits as we go. After each check, shift bits off | ||
// to the left, so that the next relevant group is at the | ||
// highest bit location in the H3 Index, which we can | ||
// easily read off with the `GT` macro. This strategy helps | ||
// us avoid re-computing shifts and masks for each group. | ||
// | ||
// | Region | # bits | | ||
// |------------|--------| | ||
// | High | 1 | | ||
// | Mode | 4 | | ||
// | Reserved | 3 | | ||
// | Resolution | 4 | | ||
// | Base Cell | 7 | | ||
// | Digit 1 | 3 | | ||
// | Digit 2 | 3 | | ||
// | ... | ... | | ||
// | Digit 15 | 3 | | ||
// | ||
// Additionally, we try to group operations and avoid loops when possible. | ||
|
||
// The 1 high bit should be 0b0 | ||
// The 4 mode bits should be 0b0001 (H3_CELL_MODE) | ||
// The 3 reserved bits should be 0b000 | ||
// In total, the top 8 bits should be 0b00001000 | ||
if (GT(h, 8) != 0b00001000) return false; | ||
|
||
// No need to check resolution; any 4 bits give a valid resolution. | ||
const int res = H3_GET_RESOLUTION(h); | ||
|
||
// Check that base cell number is valid. | ||
const int bc = H3_GET_BASE_CELL(h); | ||
if (bc >= NUM_BASE_CELLS) return false; | ||
|
||
// Check that no digit from 1 to `res` is 7 (INVALID_DIGIT). | ||
/* | ||
|
||
MHI = 0b100100100100100100100100100100100100100100100; | ||
MLO = MHI >> 2; | ||
|
||
|
||
| d | d & MHI | ~d | ~d - MLO | d & MHI & (~d - MLO) | result | | ||
|-----|---------|-----|----------|----------------------|---------| | ||
| 000 | 000 | | | 000 | OK | | ||
| 001 | 000 | | | 000 | OK | | ||
| 010 | 000 | | | 000 | OK | | ||
| 011 | 000 | | | 000 | OK | | ||
| 100 | 100 | 011 | 010 | 000 | OK | | ||
| 101 | 100 | 010 | 001 | 000 | OK | | ||
| 110 | 100 | 001 | 000 | 000 | OK | | ||
| 111 | 100 | 000 | 111* | 100 | invalid | | ||
|
||
*: carry happened | ||
|
||
|
||
Note: only care about identifying the *lowest* 7. | ||
|
||
Examples with multiple digits: | ||
|
||
| d | d & MHI | ~d | ~d - MLO | d & MHI & (~d - MLO) | result | | ||
|---------|---------|---------|----------|----------------------|---------| | ||
| 111.111 | 100.100 | 000.000 | 110.111* | 100.100 | invalid | | ||
| 110.111 | 100.100 | 001.000 | 111.111* | 100.100 | invalid | | ||
| 110.110 | 100.100 | 001.001 | 000.000 | 000.000 | OK | | ||
|
||
*: carry happened | ||
|
||
In the second example with 110.111, we "misidentify" the 110 as a 7, due | ||
to a carry from the lower bits. But this is OK because we correctly | ||
identify the lowest (only, in this example) 7 just before it. | ||
|
||
We only have to worry about carries affecting higher bits in the case of | ||
a 7; all other digits (0--6) don't cause a carry when computing ~d - MLO. | ||
So even though a 7 can affect the results of higher bits, this is OK | ||
because we will always correctly identify the lowest 7. | ||
|
||
For further notes, see the discussion here: | ||
https://github.com/uber/h3/pull/496#discussion_r795851046 | ||
|
||
*/ | ||
{ | ||
// does setting static make this faster? | ||
const uint64_t MHI = 0b100100100100100100100100100100100100100100100; | ||
const uint64_t MLO = MHI >> 2; | ||
|
||
H3Index g = h; | ||
|
||
int shift = 3 * (15 - res); | ||
g >>= shift; | ||
g <<= shift; | ||
|
||
if (g & MHI & (~g - MLO)) return false; | ||
} | ||
|
||
// Check that all unused digits after `res` are set to 7 (INVALID_DIGIT). | ||
// Bit shift to avoid looping through digits. | ||
if (res < 15) { | ||
H3Index g = ~h; | ||
|
||
int shift = 19 + 3 * res; | ||
g <<= shift; | ||
g >>= shift; | ||
|
||
if (g) return false; | ||
} | ||
|
||
// One final validation just for pentagons: | ||
// Pentagon cells start with a sequence of 0's (CENTER_DIGIT's). | ||
// The first nonzero digit can't be a 1 (i.e., "deleted subsequence", | ||
// PENTAGON_SKIPPED_DIGIT, or K_AXES_DIGIT). | ||
if (isBaseCellPentagonArr[bc]) { | ||
H3Index g = h; | ||
g <<= 19; | ||
g >>= 19; // at this point, g < 2^45 - 1 | ||
Comment on lines
+349
to
+350
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not replace this with a simple There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, agreed! |
||
|
||
if (g == 0) return true; // all zeros (res 15 pentagon) | ||
|
||
// Converting g to a double essentially puts log2(g) into the | ||
// exponent bits of f (with an offset). | ||
double f = g; | ||
|
||
// reinterpret the double bits as uint64_t | ||
g = *(uint64_t *)&f; | ||
Comment on lines
+360
to
+361
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm unclear if this is safe - it may be necessary to use a different implementation on a platform like ARM (we can probably test this on Raspberry Pi). We should check if UBSan complains about this. |
||
|
||
// Take the 11 exponent bits and the 1 sign bit. | ||
// The sign bit is guaranteed to be 0 in our case. | ||
g = GT(g, 12); | ||
|
||
// Subtract the exponent bias. | ||
g -= 1023; | ||
|
||
// g now holds the index of (its previous) first nonzero bit. | ||
// The first nonzero digit is a 1 (and thus invalid) if the | ||
// first nonzero bit's position is divisible by 3. | ||
if (g % 3 == 0) return false; | ||
} | ||
|
||
// If no flaws were identified above, then the index is a valid H3 cell. | ||
return true; | ||
} | ||
|
||
/** | ||
* Determines whether an H3 index is a valid cell (hexagon or pentagon). | ||
* | ||
* @param h H3 index to test. | ||
* | ||
* @return 1 if the H3 index if valid, and 0 if it is not. | ||
*/ | ||
int H3_EXPORT(isValidCell)(H3Index h) { | ||
// return _isValidCell_old(h); | ||
// return _isValidCell_pop(h); | ||
return _isValidCell_const(h); | ||
} | ||
|
||
/** | ||
* Initializes an H3 index. | ||
* @param hp The H3 index to initialize. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: This looks like "greater than" to me -
GET_BITS
?