From 276fe929a4e15caabb23d7e48b5d8512853f4bc6 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 20 Sep 2024 15:46:18 -0700 Subject: [PATCH] Fix Theta --- _includes/toc.html | 80 ------------------------------ docs/Theta/ThetaSketches.md | 10 ++-- src/main/resources/docgen/toc.json | 69 +------------------------- 3 files changed, 6 insertions(+), 153 deletions(-) diff --git a/_includes/toc.html b/_includes/toc.html index ea0e7a929..3ad798071 100644 --- a/_includes/toc.html +++ b/_includes/toc.html @@ -94,88 +94,8 @@
  • •HLL Sketch vs Druid HyperLogLogCollector
  • -
  • •Theta Sketches
  • - -

    - -

    -
    -
  • •Theta Sketch Framework
  • - -

    - -

    - - -

    - -

    - - -

    - -

    - - -

    - -

    - - -

    - -

    - - -

    - -

    - - -

    - -

    - -
    -
  • •Tuple Sketches
  • -

    diff --git a/docs/Theta/ThetaSketches.md b/docs/Theta/ThetaSketches.md index 27bfeb8f1..c64bf9233 100644 --- a/docs/Theta/ThetaSketches.md +++ b/docs/Theta/ThetaSketches.md @@ -61,7 +61,7 @@ layout: doc_page ## Theta Sketch Framework -Theta Sketches are a generalization of the well known Kth Minimum Value (KMV)1,2 +Theta Sketches are a generalization of the well known Kth Minimum Value (KMV) [^1],[^2] sketches in that KMV sketches are a form of Theta Sketch, but not all Theta Sketches are KMV. The Theta Sketch Framework (TSF) @@ -99,7 +99,7 @@ we are going to create a separate threshold variable and call it theta (&thet This effectively decouples #3 and #4 above from k. When the sketch is empty θ = 1.0. After the sketch has filled with k minimum values θ is still 1.0. When the next incoming unique value must be inserted into the sketch the (k+1)th -minimum value, is assigned to θ and removed from the cache.3 +minimum value, is assigned to θ and removed from the cache.[^3] Ultimately, it will be the size of S, |S|, that will determine the stored size of a sketch, which decouples #2 above from the value k. @@ -111,11 +111,11 @@ We will discuss the RSE in a later section. ThetaSketch1 -[1] Z. Bar-Yossef, T. Jayram, R. Kumar, D. Sivakumar, and L. Trevisan. Counting distinct elements in a data stream. In Randomization and Approximation Techniques in Computer Science, pages 1–10. Springer, 2002. +[^1]: Z. Bar-Yossef, T. Jayram, R. Kumar, D. Sivakumar, and L. Trevisan. Counting distinct elements in a data stream. In Randomization and Approximation Techniques in Computer Science, pages 1–10. Springer, 2002. -[2] See KMV Tutorial for a brief tutorial on KMV Sketches. +[^2]: See KMV Tutorial for a brief tutorial on KMV Sketches. -[3] This is a limited "KMV perspective" on how θ gets assigned. The attached paper +[^3]: This is a limited "KMV perspective" on how θ gets assigned. The attached paper Theta Sketch Framework presents multiple ways that θ can be assigned using the Theta Choosing Function (TCF). Different sketch algorithms have different TCFs. diff --git a/src/main/resources/docgen/toc.json b/src/main/resources/docgen/toc.json index 861339778..ef9b57f17 100644 --- a/src/main/resources/docgen/toc.json +++ b/src/main/resources/docgen/toc.json @@ -75,74 +75,7 @@ ] }, - { "class":"Dropdown", "desc" : "Theta Sketches", "array": - [ - { "class":"Doc", "desc" : "Theta Sketch Framework", "dir" : "Theta", "file": "ThetaSketchFramework" }, - - { "class":"Dropdown", "desc" : "Theta Examples", "array": - [ - {"class":"Doc", "desc" : "Concurrent Theta Sketch", "dir" : "Theta", "file": "ConcurrentThetaSketch" }, - {"class":"Doc", "desc" : "Theta Sketch Java Example", "dir" : "Theta", "file": "ThetaJavaExample" }, - {"class":"Doc", "desc" : "Theta Sketch Spark Example", "dir" : "Theta", "file": "ThetaSparkExample" }, - {"class":"Doc", "desc" : "Theta Sketch Pig UDFs", "dir" : "Theta", "file": "ThetaPigUDFs" }, - {"class":"Doc", "desc" : "Theta Sketch Hive UDFs", "dir" : "Theta", "file": "ThetaHiveUDFs" }, - ] - }, - - { "class":"Dropdown", "desc" : "KMV Tutorial", "array": - [ - {"class":"Doc", "desc" : "The Inverse Estimate", "dir" : "Theta", "file": "InverseEstimate" }, - {"class":"Doc", "desc" : "Empty Sketch", "dir" : "Theta", "file": "KMVempty" }, - {"class":"Doc", "desc" : "First Estimator", "dir" : "Theta", "file": "KMVfirstEst" }, - {"class":"Doc", "desc" : "Better Estimator", "dir" : "Theta", "file": "KMVbetterEst" }, - {"class":"Doc", "desc" : "Rejection Rules", "dir" : "Theta", "file": "KMVrejection" }, - {"class":"Doc", "desc" : "Update V(kth) Rule", "dir" : "Theta", "file": "KMVupdateVkth" }, - ] - }, - - { "class":"Dropdown", "desc" : "Set Operations and P-sampling", "array": - [ - {"class":"Doc", "desc" : "Set Operations", "dir" : "Theta", "file": "ThetaSketchSetOps" }, - {"class":"Doc", "desc" : "Model & Test Set Operations", "dir" : "Theta", "file": "ThetaSetOpsCornerCases" }, - {"class":"Doc", "desc" : "p-Sampling", "dir" : "Theta", "file": "ThetaPSampling" }, - ] - }, - - { "class":"Dropdown", "desc" : "Accuracy", "array": - [ - {"class":"Doc", "desc" : "Basic Accuracy", "dir" : "Theta", "file": "ThetaAccuracy" }, - {"class":"Doc", "desc" : "Accuracy Plots", "dir" : "Theta", "file": "ThetaAccuracyPlots" }, - {"class":"Doc", "desc" : "Relative Error Table", "dir" : "Theta", "file": "ThetaErrorTable" }, - {"class":"Doc", "desc" : "SetOp Accuracy", "dir" : "Theta", "file": "ThetaSketchSetOpsAccuracy" }, - {"class":"Doc", "desc" : "Unions With Different k", "dir" : "Theta", "file": "AccuracyOfDifferentKUnions" }, - ] - }, - - { "class":"Dropdown", "desc" : "Size", "array": - [ - {"class":"Doc", "desc" : "Theta Sketch Size", "dir" : "Theta", "file": "ThetaSize" }, - ] - }, - - { "class":"Dropdown", "desc" : "Speed", "array": - [ - {"class":"Doc", "desc" : "Update Speed", "dir" : "Theta", "file": "ThetaUpdateSpeed" }, - {"class":"Doc", "desc" : "Merge Speed", "dir" : "Theta", "file": "ThetaMergeSpeed" }, - ] - }, - - { "class":"Dropdown", "desc" : "Theta Sketch Theory", "array": - [ - {"class":"Doc", "desc" : "Theta Sketch Framework (PDF)", "dir" : "", "file": "ThetaSketchFramework", "pdf":"true" }, - {"class":"Doc", "desc" : "Theta Sketch Equations (PDF)", "dir" : "", "file": "ThetaSketchEquations", "pdf":"true" }, - {"class":"Doc", "desc" : "DataSketches (PDF)", "dir" : "", "file": "DataSketches", "pdf":"true" }, - {"class":"Doc", "desc" : "Confidence Intervals Notes", "dir" : "Theta", "file": "ThetaConfidenceIntervals" }, - {"class":"Doc", "desc" : "Merging Algorithm Notes", "dir" : "Theta", "file": "ThetaMergingAlgorithm" }, - {"class":"Doc", "desc" : "Theta References", "dir" : "Theta", "file": "ThetaReferences" }, - ] - }, - ] - }, + { "class":"Doc", "desc" : "Theta Sketches", "dir" : "Theta", "file": "ThetaSketches" }, { "class":"Doc", "desc" : "Tuple Sketches", "dir" : "Tuple", "file": "TupleSketches" }, ] },