dbt-labs · QMalcolm · Sep 26, 2024 · Sep 19, 2024 · Sep 20, 2024 · Sep 20, 2024
@@ -0,0 +1,6 @@
+kind: Features
+body: Enable `retry` support for microbatch models
+time: 2024-09-25T16:50:02.105069-05:00
+custom:
+  Author: QMalcolm MichelleArk
+  Issue: "10624"
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import List, Tuple
+
+from dbt_common.dataclass_schema import dbtClassMixin
+
+BatchType = Tuple[datetime, datetime]
+
+
+@dataclass
+class BatchResults(dbtClassMixin):
+    successful: List[BatchType] = field(default_factory=list)
+    failed: List[BatchType] = field(default_factory=list)
+
+    def __add__(self, other: BatchResults) -> BatchResults:
+        return BatchResults(
+            successful=self.successful + other.successful,
+            failed=self.failed + other.failed,
+        )
@@ -55,6 +55,7 @@ class NodeStatus(StrEnum):
     Fail = "fail"
     Warn = "warn"
     Skipped = "skipped"
+    PartialSuccess = "partial success"
     Pass = "pass"
     RuntimeErr = "runtime error"
 
@@ -63,6 +64,7 @@ class RunStatus(StrEnum):
     Success = NodeStatus.Success
     Error = NodeStatus.Error
     Skipped = NodeStatus.Skipped
+    PartialSuccess = NodeStatus.PartialSuccess
 
 
 class TestStatus(StrEnum):

@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import copy
 import threading
 from dataclasses import dataclass, field
@@ -17,6 +19,7 @@
     get_artifact_schema_version,
     schema_version,
 )
+from dbt.artifacts.schemas.batch_results import BatchResults
 from dbt.artifacts.schemas.results import (
     BaseResult,
     ExecutionResult,
@@ -34,6 +37,7 @@ class RunResult(NodeResult):
     agate_table: Optional["agate.Table"] = field(
         default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
     )
+    batch_results: Optional[BatchResults] = None
 
     @property
     def skipped(self):
@@ -51,6 +55,7 @@ def from_node(cls, node: ResultNode, status: RunStatus, message: Optional[str]):
             node=node,
             adapter_response={},
             failures=None,
+            batch_results=None,
         )
 
 
@@ -67,6 +72,7 @@ class RunResultOutput(BaseResult):
     compiled: Optional[bool]
     compiled_code: Optional[str]
     relation_name: Optional[str]
+    batch_results: Optional[BatchResults] = None
 
 
 def process_run_result(result: RunResult) -> RunResultOutput:
@@ -82,6 +88,7 @@ def process_run_result(result: RunResult) -> RunResultOutput:
         message=result.message,
         adapter_response=result.adapter_response,
         failures=result.failures,
+        batch_results=result.batch_results,
         compiled=result.node.compiled if compiled else None,  # type:ignore
         compiled_code=result.node.compiled_code if compiled else None,  # type:ignore
         relation_name=result.node.relation_name if compiled else None,  # type:ignore

@@ -60,6 +60,7 @@
 from dbt.artifacts.resources import SqlOperation as SqlOperationResource
 from dbt.artifacts.resources import TimeSpine
 from dbt.artifacts.resources import UnitTestDefinition as UnitTestDefinitionResource
+from dbt.artifacts.schemas.batch_results import BatchType
 from dbt.contracts.graph.model_config import UnitTestNodeConfig
 from dbt.contracts.graph.node_args import ModelNodeArgs
 from dbt.contracts.graph.unparsed import (
@@ -442,6 +443,8 @@ def resource_class(cls) -> Type[HookNodeResource]:
 
 @dataclass
 class ModelNode(ModelResource, CompiledNode):
+    batches: Optional[List[BatchType]] = None
+
     @classmethod
     def resource_class(cls) -> Type[ModelResource]:
         return ModelResource

@@ -1916,6 +1916,7 @@ message EndOfRunSummary {
     int32 num_errors = 1;
     int32 num_warnings = 2;
     bool keyboard_interrupt = 3;
+    int32 num_partial_success = 4;
 }
 
 message EndOfRunSummaryMsg {

@@ -1293,9 +1293,12 @@ def code(self) -> str:
         return "Q012"
 
     def message(self) -> str:
-        if self.status == "error":
+        if self.status == "error":  # or 'PARTIAL SUCCESS' in self.status:
             info = "ERROR creating"
             status = red(self.status.upper())
+        elif "PARTIAL SUCCESS" in self.status:
+            info = "PARTIALLY created"
+            status = yellow(self.status.upper())
         else:
             info = "OK created"
             status = green(self.status)
@@ -1860,10 +1863,16 @@ def code(self) -> str:
     def message(self) -> str:
         error_plural = pluralize(self.num_errors, "error")
         warn_plural = pluralize(self.num_warnings, "warning")
+        partial_success_plural = pluralize(self.num_partial_success, "partial success")
+
         if self.keyboard_interrupt:
             message = yellow("Exited because of keyboard interrupt")
         elif self.num_errors > 0:
-            message = red(f"Completed with {error_plural} and {warn_plural}:")
+            message = red(
+                f"Completed with {error_plural}, {partial_success_plural}, and {warn_plural}:"
+            )
+        elif self.num_partial_success > 0:
+            message = yellow(f"Completed with {partial_success_plural} and {warn_plural}")
         elif self.num_warnings > 0:
             message = yellow(f"Completed with {warn_plural}:")
         else:

@@ -1,9 +1,10 @@
 from datetime import datetime, timedelta
-from typing import List, Optional, Tuple
+from typing import List, Optional
 
 import pytz
 
 from dbt.artifacts.resources.types import BatchSize
+from dbt.artifacts.schemas.batch_results import BatchType
 from dbt.contracts.graph.nodes import ModelNode, NodeConfig
 from dbt.exceptions import DbtInternalError, DbtRuntimeError
 
@@ -68,7 +69,7 @@ def build_start_time(self, checkpoint: Optional[datetime]):
 
         return start
 
-    def build_batches(self, start: datetime, end: datetime) -> List[Tuple[datetime, datetime]]:
+    def build_batches(self, start: datetime, end: datetime) -> List[BatchType]:
         """
         Given a start and end datetime, builds a list of batches where each batch is
         the size of the model's batch_size.
@@ -79,7 +80,7 @@ def build_batches(self, start: datetime, end: datetime) -> List[Tuple[datetime,
             curr_batch_start, batch_size, 1
         )
 
-        batches: List[Tuple[datetime, datetime]] = [(curr_batch_start, curr_batch_end)]
+        batches: List[BatchType] = [(curr_batch_start, curr_batch_end)]
         while curr_batch_end <= end:
             curr_batch_start = curr_batch_end
             curr_batch_end = MicrobatchBuilder.offset_timestamp(curr_batch_start, batch_size, 1)

@@ -227,6 +227,7 @@ def _build_run_result(
         agate_table=None,
         adapter_response=None,
         failures=None,
+        batch_results=None,
     ):
         execution_time = time.time() - start_time
         thread_id = threading.current_thread().name
@@ -242,6 +243,7 @@ def _build_run_result(
             agate_table=agate_table,
             adapter_response=adapter_response,
             failures=failures,
+            batch_results=batch_results,
         )
 
     def error_result(self, node, message, start_time, timing_info):
@@ -272,6 +274,7 @@ def from_run_result(self, result, start_time, timing_info):
             agate_table=result.agate_table,
             adapter_response=result.adapter_response,
             failures=result.failures,
+            batch_results=result.batch_results,
         )
 
     def compile_and_execute(self, manifest: Manifest, ctx: ExecutionContext):

@@ -53,6 +53,7 @@ def execute(self, compiled_node, manifest):
             message="NO-OP",
             adapter_response={},
             failures=0,
+            batch_results=None,
             agate_table=None,
         )
 
@@ -65,7 +66,12 @@ class BuildTask(RunTask):
     I.E. a resource of type Model is handled by the ModelRunner which is
     imported as run_model_runner."""
 
-    MARK_DEPENDENT_ERRORS_STATUSES = [NodeStatus.Error, NodeStatus.Fail, NodeStatus.Skipped]
+    MARK_DEPENDENT_ERRORS_STATUSES = [
+        NodeStatus.Error,
+        NodeStatus.Fail,
+        NodeStatus.Skipped,
+        NodeStatus.PartialSuccess,
+    ]
 
     RUNNER_MAP = {
         NodeType.Model: run_model_runner,

@@ -43,6 +43,7 @@ def _build_run_model_result(self, model, context):
             message=message,
             adapter_response=adapter_response,
             failures=None,
+            batch_results=None,
         )
 
     def compile(self, manifest: Manifest):

@@ -35,6 +35,7 @@ def execute(self, compiled_node, manifest):
             message=None,
             adapter_response={},
             failures=None,
+            batch_results=None,
         )
 
     def compile(self, manifest: Manifest):

@@ -39,7 +39,7 @@ def get_counts(flat_nodes) -> str:
 
 
 def interpret_run_result(result) -> str:
-    if result.status in (NodeStatus.Error, NodeStatus.Fail):
+    if result.status in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.PartialSuccess):
         return "error"
     elif result.status == NodeStatus.Skipped:
         return "skip"
@@ -136,7 +136,7 @@ def print_run_result_error(
 def print_run_end_messages(
     results, keyboard_interrupt: bool = False, groups: Optional[Dict[str, Group]] = None
 ) -> None:
-    errors, warnings = [], []
+    errors, warnings, partial_successes = [], [], []
     for r in results:
         if r.status in (NodeStatus.RuntimeErr, NodeStatus.Error, NodeStatus.Fail):
             errors.append(r)
@@ -146,12 +146,15 @@ def print_run_end_messages(
             errors.append(r)
         elif r.status == NodeStatus.Warn:
             warnings.append(r)
+        elif r.status == NodeStatus.PartialSuccess:
+            partial_successes.append(r)
 
     fire_event(Formatting(""))
     fire_event(
         EndOfRunSummary(
             num_errors=len(errors),
             num_warnings=len(warnings),
+            num_partial_success=len(partial_successes),
             keyboard_interrupt=keyboard_interrupt,
         )
     )

@@ -23,7 +23,13 @@
 from dbt.task.test import TestTask
 from dbt_common.exceptions import DbtRuntimeError
 
-RETRYABLE_STATUSES = {NodeStatus.Error, NodeStatus.Fail, NodeStatus.Skipped, NodeStatus.RuntimeErr}
+RETRYABLE_STATUSES = {
+    NodeStatus.Error,
+    NodeStatus.Fail,
+    NodeStatus.Skipped,
+    NodeStatus.RuntimeErr,
+    NodeStatus.PartialSuccess,
+}
 IGNORE_PARENT_FLAGS = {
     "log_path",
     "output_path",
@@ -123,6 +129,14 @@ def run(self):
             ]
         )
 
+        batch_map = {
+            result.unique_id: result.batch_results.failed
+            for result in self.previous_results.results
+            if result.status == NodeStatus.PartialSuccess
+            and result.batch_results is not None
+            and len(result.batch_results.failed) > 0
+        }
+
         class TaskWrapper(self.task_class):
             def get_graph_queue(self):
                 new_graph = self.graph.get_subset_graph(unique_ids)
@@ -138,6 +152,9 @@ def get_graph_queue(self):
             self.manifest,
         )
 
+        if self.task_class == RunTask:
+            task.batch_map = batch_map
+
         return_value = task.run()
         return return_value