From 3736a249b2bca1fa28234d56eef02c6d553075b5 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 2 Aug 2024 16:23:36 -0700 Subject: [PATCH 1/3] Update UnderstandingKLLBounds.md --- docs/KLL/UnderstandingKLLBounds.md | 48 +++++++++++++----------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/docs/KLL/UnderstandingKLLBounds.md b/docs/KLL/UnderstandingKLLBounds.md index 06b90a6f..80871666 100644 --- a/docs/KLL/UnderstandingKLLBounds.md +++ b/docs/KLL/UnderstandingKLLBounds.md @@ -63,30 +63,20 @@ public class QuantileBoundsTest { println("r2: " + r2); println(""); - double r1LB = sk.getRankLowerBound(r1); - println("r1LB: " + r1LB); - double r1UB = sk.getRankUpperBound(r1); - println("r1UB: " + r1UB); - - double r2LB = sk.getRankLowerBound(r2); - println("r2LB: " + r2LB); - double r2UB = sk.getRankUpperBound(r2); - println("r2UB: " + r2UB); + println("r1LB(r1): " + sk.getRankLowerBound(r1)); + println("r1UB(r1): " + sk.getRankUpperBound(r1)); + println("r2LB(r2): " + sk.getRankLowerBound(r2)); + println("r2UB(r2): " + sk.getRankUpperBound(r2)); println(""); - double q1LB = sk.getQuantileLowerBound(r1); - println("q1LB(r1): " + q1LB); - double q1UB = sk.getQuantileUpperBound(r1); - println("q1UB(r1): " + q1UB); - - double q2LB = sk.getQuantileLowerBound(r2); - println("q2LB(r2): " + q2LB); - double q2UB = sk.getQuantileUpperBound(r2); - println("q2UB(r2): " + q2UB); + println("q1LB(r1): " + sk.getQuantileLowerBound(r1)); + println("q1UB(r1): " + sk.getQuantileUpperBound(r1)); + println("q2LB(r2): " + sk.getQuantileLowerBound(r2)); + println("q2UB(r2): " + sk.getQuantileUpperBound(r2)); println(""); } - static void println(Object o) { System.out.println(o.toString()); } + private static void println(Object o) { System.out.println(o.toString()); } } ``` @@ -101,10 +91,10 @@ q2: 620.0 r1: 0.5 r2: 0.52 -r1LB: 0.4932237572729862 -r1UB: 0.5067762427270138 -r2LB: 0.5132237572729862 -r2UB: 0.5267762427270138 +r1LB(r1): 0.4932237572729862 +r1UB(r1): 0.5067762427270138 +r2LB(r2): 0.5132237572729862 +r2UB(r2): 0.5267762427270138 q1LB(r1): 494.0 q1UB(r1): 608.0 @@ -119,19 +109,21 @@ The sketch is configured with a k=400, which results in a normalized rank error The input stream of 1000 values has a big discontinuity starting at *i* = 501. So the actual sequence of inputs is 1 to 500 and 601 to 1100. -QuantileBounds1.png +QuantileBounds1.png We choose two quantiles on either side of the discontinuity, 500 and 620, and get their respective ranks of 0.5 and 0.52. Note that because of the discontinuity the difference in the input quantiles is 120/1100 or ~10.9%, while the difference in their respective ranks is only 2%. -Next we compute the upper and lower rank bounds of the two resulting ranks of 0.5 and 0.52, which are given above. Note that the UB - LB of each rank is about .013 which is 2 X .0067. This means that the true rank of each quantile is within the UB - LB range of ranks with a confidence of 99%, which is about +/- 2.6 standard deviations from the mean. +Next we compute the rank upper bound (UB) and rank lower bound (LB) of the two resulting ranks of 0.5 and 0.52, which are given above. Note that the UB - LB of each rank is about .013 which is 2 X .0067. This means that the true rank of each quantile is within the UB - LB range of ranks with a confidence of 99%, which is about +/- 2.6 standard deviations from the mean. + +Then we compute the quantile UB and LB of the same two resulting ranks of 0.5 and 0.52. Note that the UB - LB quantile range of *r1* is 114/1100 or 10.4%, because in between the rank UB and LB is the discontinuity. These points are shown in the next plot -Then we compute the upper and lower quantile bounds of the same two resulting ranks of 0.5 and 0.52. Note that the UB - LB quantile range of *r1* is 114/1100 or 10.4%, because in between the rank UB and LB is the discontinuity. These points are shown in the next image. +[//]: # ( {{site.docs_img_dir}} ) -QuantileBounds2.png +QuantileBounds2.png This graphically illustrates why the mathematical guarantee of error applies only to the rank domain, because the input quantile domian could have huge discontinuities. Nonetheless, we **can** say that the true quantile does lie within that UB - LB quantile range with a confidence of 99%. But we cannot guarantee anything about the UB - LB quantile difference and relate that to a quantile accuracy compared to the total range of the input values. -Our Classic, KLL, and REQ quantiles sketches are input insensitive and do not know or care what the input distribution looks like. It does not have to be a smooth and well behaved function! This is not the case with other heuristic quantile algorithms, +Our Classic, KLL, and REQ quantiles sketches are input insensitive and do not know or care what the input distribution looks like. It does not have to be a smooth and well behaved function. This is not the case with other heuristic quantile algorithms, From a2032e9591bfa613b560e2f543484b60d00e6382 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 2 Aug 2024 16:34:20 -0700 Subject: [PATCH 2/3] Change one word. --- docs/KLL/UnderstandingKLLBounds.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/KLL/UnderstandingKLLBounds.md b/docs/KLL/UnderstandingKLLBounds.md index 80871666..c56e4bdc 100644 --- a/docs/KLL/UnderstandingKLLBounds.md +++ b/docs/KLL/UnderstandingKLLBounds.md @@ -113,7 +113,7 @@ The input stream of 1000 values has a big discontinuity starting at *i* = 501. S We choose two quantiles on either side of the discontinuity, 500 and 620, and get their respective ranks of 0.5 and 0.52. Note that because of the discontinuity the difference in the input quantiles is 120/1100 or ~10.9%, while the difference in their respective ranks is only 2%. -Next we compute the rank upper bound (UB) and rank lower bound (LB) of the two resulting ranks of 0.5 and 0.52, which are given above. Note that the UB - LB of each rank is about .013 which is 2 X .0067. This means that the true rank of each quantile is within the UB - LB range of ranks with a confidence of 99%, which is about +/- 2.6 standard deviations from the mean. +Next we compute the rank upper bound (UB) and rank lower bound (LB) of the two resulting ranks of 0.5 and 0.52, which are given above. Note that the UB - LB of each rank is about .013 which is 2 X .0067. This means that the true rank of each quantile is within the UB - LB range of ranks with a confidence of 99%, which is about +/- 2.6 standard deviations from the estimate. Then we compute the quantile UB and LB of the same two resulting ranks of 0.5 and 0.52. Note that the UB - LB quantile range of *r1* is 114/1100 or 10.4%, because in between the rank UB and LB is the discontinuity. These points are shown in the next plot From 213bddd835750b63b0935b32a219d4e32d2a2b0e Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 2 Aug 2024 17:08:41 -0700 Subject: [PATCH 3/3] Back to {{site.docs_img_dir}} --- docs/KLL/UnderstandingKLLBounds.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/KLL/UnderstandingKLLBounds.md b/docs/KLL/UnderstandingKLLBounds.md index c56e4bdc..1ce56033 100644 --- a/docs/KLL/UnderstandingKLLBounds.md +++ b/docs/KLL/UnderstandingKLLBounds.md @@ -109,7 +109,7 @@ The sketch is configured with a k=400, which results in a normalized rank error The input stream of 1000 values has a big discontinuity starting at *i* = 501. So the actual sequence of inputs is 1 to 500 and 601 to 1100. -QuantileBounds1.png +QuantileBounds1.png We choose two quantiles on either side of the discontinuity, 500 and 620, and get their respective ranks of 0.5 and 0.52. Note that because of the discontinuity the difference in the input quantiles is 120/1100 or ~10.9%, while the difference in their respective ranks is only 2%. @@ -117,9 +117,7 @@ Next we compute the rank upper bound (UB) and rank lower bound (LB) of the two r Then we compute the quantile UB and LB of the same two resulting ranks of 0.5 and 0.52. Note that the UB - LB quantile range of *r1* is 114/1100 or 10.4%, because in between the rank UB and LB is the discontinuity. These points are shown in the next plot -[//]: # ( {{site.docs_img_dir}} ) - -QuantileBounds2.png +QuantileBounds2.png This graphically illustrates why the mathematical guarantee of error applies only to the rank domain, because the input quantile domian could have huge discontinuities. Nonetheless, we **can** say that the true quantile does lie within that UB - LB quantile range with a confidence of 99%. But we cannot guarantee anything about the UB - LB quantile difference and relate that to a quantile accuracy compared to the total range of the input values.