Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(eap-items): Use the subscriptable bucket mapper for EAP items #6908

Merged
merged 5 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions snuba/clickhouse/translators/snuba/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
Param,
String,
)
from snuba.utils.constants import ATTRIBUTE_BUCKETS
from snuba.utils.hashes import fnv_1a


Expand Down Expand Up @@ -239,6 +238,7 @@ class SubscriptableHashBucketMapper(SubscriptableReferenceMapper):
from_column_name: str
to_col_table: Optional[str]
to_col_name: str
num_attribute_buckets: int

def attempt_map(
self,
Expand All @@ -256,7 +256,7 @@ def attempt_map(
if not isinstance(key.value, str):
return None

bucket_idx = fnv_1a(key.value.encode("utf-8")) % ATTRIBUTE_BUCKETS
bucket_idx = fnv_1a(key.value.encode("utf-8")) % self.num_attribute_buckets
return arrayElement(
expression.alias,
ColumnExpr(None, self.to_col_table, f"{self.to_col_name}_{bucket_idx}"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,87 +16,8 @@ schema:
{ name: attributes_bool, type: Map, args: { key: { type: String }, value: { type: Bool } } },
{ name: attributes_int, type: Map, args: { key: { type: String }, value: { type: Int, args: { size: 64 } } } },

{ name: attributes_string_0, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_1, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_2, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_3, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_4, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_5, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_6, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_7, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_8, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_9, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_10, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_11, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_12, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_13, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_14, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_15, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_16, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_17, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_18, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_19, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_20, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_21, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_22, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_23, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_24, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_25, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_26, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_27, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_28, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_29, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_30, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_31, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_32, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_33, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_34, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_35, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_36, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_37, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_38, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_string_39, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_float_0, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_1, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_2, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_3, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_4, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_5, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_6, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_7, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_8, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_9, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_10, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_11, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_12, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_13, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_14, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_15, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_16, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_17, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_18, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_19, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_20, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_21, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_22, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_23, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_24, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_25, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_26, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_27, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_28, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_29, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_30, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_31, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_32, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_33, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_34, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_35, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_36, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_37, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_38, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: attributes_float_39, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },

{ name: attributes_string, type: Map, args: { key: { type: String }, value: { type: String } } },
{ name: attributes_float, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } },
{ name: _hash_map_string_0, type: Array, args: { inner_type: { type: UInt, args: { size: 64 } } } },
{ name: _hash_map_string_1, type: Array, args: { inner_type: { type: UInt, args: { size: 64 } } } },
{ name: _hash_map_string_2, type: Array, args: { inner_type: { type: UInt, args: { size: 64 } } } },
Expand Down Expand Up @@ -182,11 +103,33 @@ schema:
storages:
- storage: eap_items
is_writable: true
translation_mappers:
subscriptables:
- mapper: SubscriptableHashBucketMapper
args:
from_column_table: null
from_column_name: attributes_string
to_col_table: null
to_col_name: attributes_string
num_attribute_buckets: 40
- mapper: SubscriptableHashBucketMapper
args:
from_column_table: null
from_column_name: attributes_float
to_col_table: null
to_col_name: attributes_float
num_attribute_buckets: 40

storage_selector:
selector: DefaultQueryStorageSelector

query_processors: []
query_processors:
- processor: HashBucketFunctionTransformer
args:
hash_bucket_names:
- attributes_string
- attributes_float
num_attribute_buckets: 40

validators: []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,14 @@ storages:
from_column_name: attr_str
to_col_table: null
to_col_name: attr_str
num_attribute_buckets: 20
- mapper: SubscriptableHashBucketMapper
args:
from_column_table: null
from_column_name: attr_num
to_col_table: null
to_col_name: attr_num
num_attribute_buckets: 20

storage_selector:
selector: DefaultQueryStorageSelector
Expand All @@ -78,6 +80,7 @@ query_processors:
hash_bucket_names:
- attr_str
- attr_num
num_attribute_buckets: 20

validate_data_model: do_nothing # in order to reference aliased columns, we shouldn't validate columns purely based on the entity schema
validators:
Expand Down
7 changes: 4 additions & 3 deletions snuba/query/processors/logical/hash_bucket_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from snuba.query.logical import Query
from snuba.query.processors.logical import LogicalQueryProcessor
from snuba.query.query_settings import QuerySettings
from snuba.utils.constants import ATTRIBUTE_BUCKETS
from snuba.utils.hashes import fnv_1a


Expand All @@ -25,8 +24,10 @@ class HashBucketFunctionTransformer(LogicalQueryProcessor):
def __init__(
self,
hash_bucket_names: Sequence[str],
num_attribute_buckets: int,
):
self.hash_bucket_names = hash_bucket_names
self.num_attribute_buckets = num_attribute_buckets

def process_query(self, query: Query, query_settings: QuerySettings) -> None:
def transform_map_keys_and_values_expression(exp: Expression) -> Expression:
Expand Down Expand Up @@ -61,7 +62,7 @@ def transform_map_keys_and_values_expression(exp: Expression) -> Expression:
),
),
)
for i in range(ATTRIBUTE_BUCKETS)
for i in range(self.num_attribute_buckets)
),
)

Expand All @@ -86,7 +87,7 @@ def transform_map_contains_expression(exp: Expression) -> Expression:
if not isinstance(key, Literal) or not isinstance(key.value, str):
return exp

bucket_idx = fnv_1a(key.value.encode("utf-8")) % ATTRIBUTE_BUCKETS
bucket_idx = fnv_1a(key.value.encode("utf-8")) % self.num_attribute_buckets
return FunctionCall(
alias=exp.alias,
function_name=exp.function_name,
Expand Down
9 changes: 4 additions & 5 deletions tests/clickhouse/translators/snuba/test_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
Literal,
SubscriptableReference,
)
from snuba.utils import constants
from snuba.utils.hashes import fnv_1a


Expand Down Expand Up @@ -127,7 +126,9 @@ def test_tag_translation() -> None:


def test_hash_bucket_tag_translation() -> None:
translated = SubscriptableHashBucketMapper(None, "tags", None, "tags").attempt_map(
translated = SubscriptableHashBucketMapper(
None, "tags", None, "tags", 25
).attempt_map(
SubscriptableReference(
"tags[release]", Column(None, None, "tags"), Literal(None, "release")
),
Expand All @@ -138,9 +139,7 @@ def test_hash_bucket_tag_translation() -> None:
"tags[release]",
"arrayElement",
(
Column(
None, None, f"tags_{fnv_1a(b'release') % constants.ATTRIBUTE_BUCKETS}"
),
Column(None, None, f"tags_{fnv_1a(b'release') % 25}"),
Literal(None, "release"),
),
)
Expand Down
11 changes: 6 additions & 5 deletions tests/query/processors/test_hash_bucket_functions_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
HashBucketFunctionTransformer,
)
from snuba.query.query_settings import HTTPQuerySettings
from snuba.utils.constants import ATTRIBUTE_BUCKETS

test_data = [
(
Expand Down Expand Up @@ -51,7 +50,7 @@
FunctionCall(
None, "mapKeys", (Column(None, None, f"attr_str_{i}"),)
)
for i in range(ATTRIBUTE_BUCKETS)
for i in range(5)
),
),
),
Expand Down Expand Up @@ -108,7 +107,7 @@
"mapValues",
(Column(None, None, f"attr_str_{i}"),),
)
for i in range(ATTRIBUTE_BUCKETS)
for i in range(5)
),
),
),
Expand Down Expand Up @@ -175,7 +174,7 @@
"mapValues",
(Column(None, None, f"attr_str_{i}"),),
)
for i in range(ATTRIBUTE_BUCKETS)
for i in range(5)
),
)
),
Expand Down Expand Up @@ -220,7 +219,9 @@
@pytest.mark.parametrize("pre_format, expected_query", test_data)
def test_format_expressions(pre_format: Query, expected_query: Query) -> None:
copy = deepcopy(pre_format)
HashBucketFunctionTransformer("attr_str").process_query(copy, HTTPQuerySettings())
HashBucketFunctionTransformer("attr_str", num_attribute_buckets=5).process_query(
copy, HTTPQuerySettings()
)
assert copy.get_selected_columns() == expected_query.get_selected_columns()
assert copy.get_groupby() == expected_query.get_groupby()
assert copy.get_condition() == expected_query.get_condition()
Loading