mongodb · mattBoros · Sep 11, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/docs/generated/workloads.md b/docs/generated/workloads.md
@@ -3864,6 +3864,26 @@ The queries in this workload exercise group stage that uses an enum like field f
 timeseries, aggregate, group 
 
 
+## [TimeseriesExtendedRange](https://www.github.com/mongodb/genny/blob/master/src/workloads/query/TimeseriesExtendedRange.yml)
+### Owner 
+Query Integration 
+
+
+### Support Channel
+[#query-integration](https://mongodb.enterprise.slack.com/archives/C04PDS7GAFM)
+
+
+### Description
+This workload runs queries on time-series collections with data before the unix epoch (extended range).
+Some optimizations can only be made on post-1970 data. This benchmark is intended to give us an idea
+of how much performance we lose on extended range data.
+
+
+
+### Keywords
+timeseries, aggregate 
+
+
 ## [TimeseriesFixedBucketing](https://www.github.com/mongodb/genny/blob/master/src/workloads/query/TimeseriesFixedBucketing.yml)
 ### Owner 
 Query Integration 

@@ -274,6 +274,12 @@ buildvariants:
   - name: write_one_replica_set
   - name: llt_mixed
   - name: llt_mixed_small
+- name: perf-3-node-replSet.arm.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
+- name: perf-3-node-replSet.intel.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
 tasks:
 - commands:
   - command: timeout.update
@@ -2015,6 +2021,17 @@ tasks:
       test_control: timeseries_enum
   name: timeseries_enum
   priority: 5
+- commands:
+  - command: timeout.update
+    params:
+      exec_timeout_secs: 86400
+      timeout_secs: 86400
+  - func: f_run_dsi_workload
+    vars:
+      auto_workload_path: src/genny/src/workloads/query/TimeseriesExtendedRange.yml
+      test_control: timeseries_extended_range
+  name: timeseries_extended_range
+  priority: 5
 - commands:
   - command: timeout.update
     params:

@@ -274,6 +274,12 @@ buildvariants:
   - name: write_one_replica_set
   - name: llt_mixed
   - name: llt_mixed_small
+- name: perf-3-node-replSet.arm.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
+- name: perf-3-node-replSet.intel.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
 tasks:
 - commands:
   - command: timeout.update
@@ -2015,6 +2021,17 @@ tasks:
       test_control: timeseries_enum
   name: timeseries_enum
   priority: 5
+- commands:
+  - command: timeout.update
+    params:
+      exec_timeout_secs: 86400
+      timeout_secs: 86400
+  - func: f_run_dsi_workload
+    vars:
+      auto_workload_path: src/genny/src/workloads/query/TimeseriesExtendedRange.yml
+      test_control: timeseries_extended_range
+  name: timeseries_extended_range
+  priority: 5
 - commands:
   - command: timeout.update
     params:

@@ -274,6 +274,12 @@ buildvariants:
   - name: write_one_replica_set
   - name: llt_mixed
   - name: llt_mixed_small
+- name: perf-3-node-replSet.arm.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
+- name: perf-3-node-replSet.intel.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
 tasks:
 - commands:
   - command: timeout.update
@@ -2015,6 +2021,17 @@ tasks:
       test_control: timeseries_enum
   name: timeseries_enum
   priority: 5
+- commands:
+  - command: timeout.update
+    params:
+      exec_timeout_secs: 86400
+      timeout_secs: 86400
+  - func: f_run_dsi_workload
+    vars:
+      auto_workload_path: src/genny/src/workloads/query/TimeseriesExtendedRange.yml
+      test_control: timeseries_extended_range
+  name: timeseries_extended_range
+  priority: 5
 - commands:
   - command: timeout.update
     params:

@@ -2015,6 +2015,17 @@ tasks:
       test_control: timeseries_enum
   name: timeseries_enum
   priority: 5
+- commands:
+  - command: timeout.update
+    params:
+      exec_timeout_secs: 86400
+      timeout_secs: 86400
+  - func: f_run_dsi_workload
+    vars:
+      auto_workload_path: src/genny/src/workloads/query/TimeseriesExtendedRange.yml
+      test_control: timeseries_extended_range
+  name: timeseries_extended_range
+  priority: 5
 - commands:
   - command: timeout.update
     params:

@@ -339,6 +339,12 @@ buildvariants:
   - name: write_one_replica_set
   - name: llt_mixed
   - name: llt_mixed_small
+- name: perf-3-node-replSet.arm.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
+- name: perf-3-node-replSet.intel.aws.2024-05
+  tasks:
+  - name: read_only_multi_threaded
 tasks:
 - commands:
   - command: timeout.update
@@ -2080,6 +2086,17 @@ tasks:
       test_control: timeseries_enum
   name: timeseries_enum
   priority: 5
+- commands:
+  - command: timeout.update
+    params:
+      exec_timeout_secs: 86400
+      timeout_secs: 86400
+  - func: f_run_dsi_workload
+    vars:
+      auto_workload_path: src/genny/src/workloads/query/TimeseriesExtendedRange.yml
+      test_control: timeseries_extended_range
+  name: timeseries_extended_range
+  priority: 5
 - commands:
   - command: timeout.update
     params:

@@ -204,6 +204,7 @@ buildvariants:
   - name: timeseries_block_processing
   - name: timeseries_count
   - name: timeseries_enum
+  - name: timeseries_extended_range
   - name: timeseries_stress_unpacking
   - name: union_with
   - name: unwind_group
@@ -409,6 +410,7 @@ buildvariants:
   - name: timeseries_block_processing
   - name: timeseries_count
   - name: timeseries_enum
+  - name: timeseries_extended_range
   - name: timeseries_stress_unpacking
   - name: union_with
   - name: unwind_group
@@ -773,7 +775,6 @@ buildvariants:
   - name: llt_mixed
 - name: perf-1-node-replSet-test-commands.arm.aws.2023-11
   tasks:
-  - name: device_monitoring
   - name: non_search_hybrid_scoring
 - name: perf-1-node-replSet-fle.arm.aws.2023-11
   tasks:
@@ -963,6 +964,7 @@ buildvariants:
   - name: timeseries_block_processing
   - name: timeseries_count
   - name: timeseries_enum
+  - name: timeseries_extended_range
   - name: timeseries_stress_unpacking
   - name: union_with
   - name: unwind_group
@@ -1010,11 +1012,9 @@ buildvariants:
   - name: transport_layer_connect_timing
 - name: perf-1-node-replSet-longRunning.arm.aws.2023-11
   tasks:
-  - name: device_monitoring
   - name: non_search_hybrid_scoring
 - name: perf-1-node-replSet-m60-like-large-volume.arm.aws.2023-11
   tasks:
-  - name: device_monitoring
   - name: non_search_hybrid_scoring
 - name: perf-3-node-replSet-limited-1-query-stats.arm.aws.2023-11
   tasks:
@@ -2769,6 +2769,17 @@ tasks:
       test_control: timeseries_enum
   name: timeseries_enum
   priority: 5
+- commands:
+  - command: timeout.update
+    params:
+      exec_timeout_secs: 86400
+      timeout_secs: 86400
+  - func: f_run_dsi_workload
+    vars:
+      auto_workload_path: src/genny/src/workloads/query/TimeseriesExtendedRange.yml
+      test_control: timeseries_extended_range
+  name: timeseries_extended_range
+  priority: 5
 - commands:
   - command: timeout.update
     params:

@@ -0,0 +1,163 @@
+SchemaVersion: 2018-07-01
+Owner: Query Integration
+Description: |
+  This workload runs queries on time-series collections with data before the unix epoch (extended range).
+  Some optimizations can only be made on post-1970 data. This benchmark is intended to give us an idea
+  of how much performance we lose on extended range data.
+
+Keywords:
+  - timeseries
+  - aggregate
+
+GlobalDefaults:
+  Database: &database test
+  Collection: &collection Collection0
+  DocumentCount: &documentCount 1e7
+  Repeat: &repeat 200
+  Threads: &threads 1
+  MaxPhases: &maxPhases 6
+  MetaCount: &metaCount 10
+
+Clients:
+  Default:
+    QueryOptions:
+      maxPoolSize: 400
+
+Actors:
+  # Clear any pre-existing collection state.
+  - Name: ClearCollection
+    Type: CrudActor
+    Database: *database
+    Threads: 1
+    Phases:
+      OnlyActiveInPhases:
+        Active: [0]
+        NopInPhasesUpTo: *maxPhases
+        PhaseConfig:
+          Repeat: 1
+          Threads: 1
+          Collection: *collection
+          Operations:
+            - OperationName: drop
+
+  - Name: CreateTimeseriesCollection
+    Type: RunCommand
+    Threads: 1
+    Phases:
+      OnlyActiveInPhases:
+        Active: [1]
+        NopInPhasesUpTo: *maxPhases
+        PhaseConfig:
+          Repeat: 1
+          Database: *database
+          Operation:
+            OperationMetricsName: CreateTimeseriesCollection
+            OperationName: RunCommand
+            OperationCommand:
+              {
+                create: *collection,
+                timeseries:
+                  {
+                    timeField: "time",
+                    metaField: "meta",
+                    granularity: "seconds",
+                  },
+              }
+
+  - Name: InsertData
+    Type: Loader
+    Threads: 1
+    Phases:
+      OnlyActiveInPhases:
+        Active: [2]
+        NopInPhasesUpTo: *maxPhases
+        PhaseConfig:
+          Repeat: 1
+          Threads: 1
+          Database: *database
+          CollectionCount: 1
+          DocumentCount: *documentCount
+          BatchSize: 1000
+          Document:
+            time:
+              ^IncDate:
+                start: 1960-01-01
+                # 100ms step ensures full bucket of 1000 documents under the "seconds" granularity.
+                step: 100
+            meta:
+              ^Cycle:
+                ofLength: *metaCount
+                fromGenerator:
+                  ^RandomString:
+                    length: 6
+                    alphabet: "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+  # Phase 2: Ensure all data is synced to disk.
+  - Name: Quiesce
+    Type: QuiesceActor
+    Threads: 1
+    Database: *database
+    Phases:
+      OnlyActiveInPhases:
+        Active: [3, 5]
+        NopInPhasesUpTo: *maxPhases
+        PhaseConfig:
+          Repeat: 1
+          Threads: 1
+
+  # The control.min.time field can be used as an accurate bucket minimum if it's not an object or
+  # an array.
+  - Name: BlockProcessingExtendedRangeMinTime
+    Type: CrudActor
+    Database: *database
+    Threads: *threads
+    Phases:
+      OnlyActiveInPhases:
+        Active: [4]
+        NopInPhasesUpTo: *maxPhases
+        PhaseConfig:
+          Repeat: *repeat
+          Database: *database
+          Collection: *collection
+          Operations:
+            - OperationMetricsName: TsBlockExtendedRangeMinTime
+              OperationName: aggregate
+              OperationCommand:
+                Pipeline:
+                  [
+                    {$project: {time: 1, meta: 1}},
+                    {$group: {_id: "$meta", gb: {$min: "$time"}}}
+                  ]
+
+  # The control.max.time field can only be used as an accurate bucket maximum if its after 1970.
+  - Name: BlockProcessingExtendedRangeMaxTime
+    Type: CrudActor
+    Database: *database
+    Threads: *threads
+    Phases:
+      OnlyActiveInPhases:
+        Active: [6]
+        NopInPhasesUpTo: *maxPhases
+        PhaseConfig:
+          Repeat: *repeat
+          Database: *database
+          Collection: *collection
+          Operations:
+            - OperationMetricsName: TsBlockExtendedRangeMaxTime
+              OperationName: aggregate
+              OperationCommand:
+                Pipeline:
+                  [
+                    {$project: {time: 1, meta: 1}},
+                    {$group: {_id: "$meta", gb: {$max: "$time"}}}
+                  ]
+
+AutoRun:
+  - When:
+      mongodb_setup:
+        $eq:
+          - replica
+          - replica-80-feature-flags
+          - replica-all-feature-flags
+      branch_name:
+        $gte: v8.0