MaterializeInc · antiguru · Jul 12, 2023 · Jun 28, 2023 · Jul 11, 2023 · Jun 28, 2023
diff --git a/clippy.toml b/clippy.toml
@@ -22,17 +22,22 @@ disallowed-methods = [
 
     { path = "aws_sdk_s3::Client::new", reason = "use the `mz_aws_s3_util::new_client` function instead" },
 
-    # Prevent access to Differential APIs that want to use the default trace or use a default name.
+    # Prevent access to Differential APIs that want to use the default trace or use a default name, or where we offer
+    # our own wrapper
     { path = "differential_dataflow::Collection::consolidate", reason = "use the `differential_dataflow::Collection::consolidate_named` function instead" },
-    { path = "differential_dataflow::operators::arrange::arrangement::Arrange::arrange", reason = "use the `arrange_named` function instead" },
-    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeByKey::arrange_by_key", reason = "use the `Arrange::arrange_named` function instead" },
-    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeByKey::arrange_by_key_named", reason = "use the `Arrange::arrange_named` function instead" },
-    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeBySelf::arrange_by_self", reason = "use the `Arrange::arrange_named` function instead" },
-    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeBySelf::arrange_by_self_named", reason = "use the `Arrange::arrange_named` function instead" },
+    { path = "differential_dataflow::operators::arrange::arrangement::Arrange::arrange", reason = "use the `MzArrange::mz_arrange_named` function instead" },
+    { path = "differential_dataflow::operators::arrange::arrangement::Arrange::arrange_named", reason = "use the `MzArrange::mz_arrange_named` function instead" },
+    { path = "differential_dataflow::operators::arrange::arrangement::Arrange::arrange_core", reason = "use the `MzArrange::mz_arrange_core` function instead" },
+    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeByKey::arrange_by_key", reason = "use the `MzArrange::mz_arrange_named` function instead" },
+    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeByKey::arrange_by_key_named", reason = "use the `MzArrange::mz_arrange_named` function instead" },
+    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeBySelf::arrange_by_self", reason = "use the `MzArrange::mz_arrange_named` function instead" },
+    { path = "differential_dataflow::operators::arrange::arrangement::ArrangeBySelf::arrange_by_self_named", reason = "use the `MzArrange::mz_arrange_named` function instead" },
     { path = "differential_dataflow::operators::reduce::Count::count", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
     { path = "differential_dataflow::operators::reduce::Count::count_core", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
     { path = "differential_dataflow::operators::reduce::Reduce::reduce", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
     { path = "differential_dataflow::operators::reduce::Reduce::reduce_named", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
+    { path = "differential_dataflow::operators::reduce::ReduceCore::reduce_abelian", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
+    { path = "differential_dataflow::operators::reduce::ReduceCore::reduce_core", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
     { path = "differential_dataflow::operators::reduce::Threshold::distinct", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
     { path = "differential_dataflow::operators::reduce::Threshold::distinct_core", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },
     { path = "differential_dataflow::operators::reduce::Threshold::threshold", reason = "use the `differential_dataflow::operators::reduce::ReduceCore::reduce_abelian` function instead" },

@@ -371,14 +371,23 @@ The `mz_arrangement_sharing` view describes how many times each [arrangement] in
 
 The `mz_arrangement_sizes` view describes the size of each [arrangement] in the system.
 
+The size, capacity, and allocations are an approximation, which may underestimate the actual size in memory.
+Specifically, reductions can use more memory than we show here.
+
 <!-- RELATION_SPEC mz_internal.mz_arrangement_sizes -->
-| Field          | Type        | Meaning                                                                                                                   |
-| -------------- |-------------| --------                                                                                                                  |
-| `operator_id`  | [`uint8`]   | The ID of the operator that created the arrangement. Corresponds to [`mz_dataflow_operators.id`](#mz_dataflow_operators). |
-| `records`      | [`numeric`] | The number of records in the arrangement.                                                                                 |
-| `batches`      | [`numeric`] | The number of batches in the arrangement.                                                                                 |
+| Field         | Type        | Meaning                                                                                                                   |
+|---------------|-------------| --------                                                                                                                  |
+| `operator_id` | [`uint8`]   | The ID of the operator that created the arrangement. Corresponds to [`mz_dataflow_operators.id`](#mz_dataflow_operators). |
+| `records`     | [`numeric`] | The number of records in the arrangement.                                                                                 |
+| `batches`     | [`numeric`] | The number of batches in the arrangement.                                                                                 |
+| `size`        | [`numeric`] | The utilized size in bytes of the arrangement.                                                                            |
+| `capacity`    | [`numeric`] | The capacity in bytes of the arrangement. Can be larger than the size.                                                    |
+| `allocations` | [`numeric`] | The number of separate memory allocations backing the arrangement.                                                        |
 
 <!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_arrangement_sizes_per_worker -->
+<!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_arrangement_heap_allocations_raw -->
+<!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_arrangement_heap_capacity_raw -->
+<!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_arrangement_heap_size_raw -->
 
 ### `mz_compute_delays_histogram`
 
@@ -484,6 +493,22 @@ The `mz_dataflow_addresses` view describes how the [dataflow] channels and opera
 
 <!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_dataflow_addresses_per_worker -->
 
+### `mz_dataflow_arrangement_sizes`
+
+The `mz_dataflow_arrangement_sizes` view describes how many records and batches
+are contained in operators under each dataflow.
+
+<!-- RELATION_SPEC mz_internal.mz_dataflow_arrangement_sizes -->
+| Field         | Type        | Meaning                                                                      |
+|---------------|-------------|------------------------------------------------------------------------------|
+| `id`          | [`uint8`]   | The ID of the [dataflow]. Corresponds to [`mz_dataflows.id`](#mz_dataflows). |
+| `name`        | [`text`]    | The name of the object (e.g., index) maintained by the dataflow.             |
+| `records`     | [`numeric`] | The number of records in all arrangements in the dataflow.                   |
+| `batches`     | [`numeric`] | The number of batches in all arrangements in the dataflow.                   |
+| `size`        | [`numeric`] | The utilized size in bytes of the arrangements.                              |
+| `capacity`    | [`numeric`] | The capacity in bytes of the arrangements. Can be larger than the size.      |
+| `allocations` | [`numeric`] | The number of separate memory allocations backing the arrangements.          |
+
 ### `mz_dataflow_channels`
 
 The `mz_dataflow_channels` view describes the communication channels between [dataflow] operators.
@@ -553,18 +578,18 @@ The `mz_dataflow_operator_parents` view describes how [dataflow] operators are n
 
 <!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_dataflow_operator_parents_per_worker -->
 
-### `mz_dataflow_arrangement_sizes`
+### `mz_dataflow_shutdown_durations_histogram`
 
-The `mz_dataflow_arrangement_sizes` view describes how many records and batches
-are contained in operators under each dataflow.
+The `mz_dataflow_shutdown_durations_histogram` view describes a histogram of the time in nanoseconds required to fully shut down dropped [dataflows][dataflow].
 
-<!-- RELATION_SPEC mz_internal.mz_dataflow_arrangement_sizes -->
-| Field     | Type        | Meaning                                                                      |
-|-----------|-------------|------------------------------------------------------------------------------|
-| `id`      | [`uint8`]   | The ID of the [dataflow]. Corresponds to [`mz_dataflows.id`](#mz_dataflows). |
-| `name`    | [`text`]    | The name of the object (e.g., index) maintained by the dataflow.             |
-| `records` | [`numeric`] | The number of records in all arrangements in the dataflow.                   |
-| `batches` | [`numeric`] | The number of batches in all arrangements in the dataflow.                   |
+<!-- RELATION_SPEC mz_internal.mz_dataflow_shutdown_durations_histogram -->
+| Field          | Type        | Meaning                                                |
+| -------------- |-------------| --------                                               |
+| `duration_ns`  | [`uint8`]   | The upper bound of the bucket in nanoseconds.          |
+| `count`        | [`numeric`] | The (noncumulative) count of dataflows in this bucket. |
+
+<!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_dataflow_shutdown_durations_histogram_per_worker -->
+<!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_dataflow_shutdown_durations_histogram_raw -->
 
 ### `mz_message_counts`
 
@@ -604,6 +629,9 @@ The `mz_records_per_dataflow` view describes the number of records in each [data
 | `id`         | [`uint8`]   | The ID of the dataflow. Corresponds to [`mz_dataflows.id`](#mz_dataflows). |
 | `name`       | [`text`]    | The internal name of the dataflow.                                         |
 | `records`    | [`numeric`] | The number of records in the dataflow.                                     |
+| `size`        | [`numeric`] | The utilized size in bytes of the arrangements.                            |
+| `capacity`    | [`numeric`] | The capacity in bytes of the arrangements. Can be larger than the size.    |
+| `allocations` | [`numeric`] | The number of separate memory allocations backing the arrangements.        |
 
 <!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_records_per_dataflow_per_worker -->
 
@@ -618,6 +646,9 @@ The `mz_records_per_dataflow_operator` view describes the number of records in e
 | `name`         | [`text`]    | The internal name of the operator.                                                           |
 | `dataflow_id`  | [`uint8`]   | The ID of the dataflow. Corresponds to [`mz_dataflows.id`](#mz_dataflows).                   |
 | `records`      | [`numeric`] | The number of records in the operator.                                                       |
+| `size`        | [`numeric`] | The utilized size in bytes of the arrangement.                                               |
+| `capacity`    | [`numeric`] | The capacity in bytes of the arrangement. Can be larger than the size.                       |
+| `allocations` | [`numeric`] | The number of separate memory allocations backing the arrangement.                           |
 
 <!-- RELATION_SPEC_UNDOCUMENTED mz_internal.mz_records_per_dataflow_operator_per_worker -->
 

diff --git a/misc/python/materialize/mzcompose/services.py b/misc/python/materialize/mzcompose/services.py
@@ -49,6 +49,7 @@
     "persist_stats_filter_enabled": "true",
     "persist_stats_collection_enabled": "true",
     "persist_stats_audit_percent": "100",
+    "enable_arrangement_size_logging": "true",
     "enable_ld_rbac_checks": "true",
     "enable_rbac_checks": "true",
     "enable_monotonic_oneshot_selects": "true",