From 5552ab2168261c693062baf9ecbd7d057f1828a9 Mon Sep 17 00:00:00 2001
From: Padraig Alton <padraig.alton@justpark.com>
Date: Fri, 9 Feb 2024 17:05:50 +0000
Subject: [PATCH 1/3] Add support for ephemeral models

---
 CHANGELOG.md         |  4 ++++
 dbt2looker/models.py |  5 +++--
 dbt2looker/parser.py | 10 +++++-----
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9943468..676d862 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 Recent and upcoming changes to dbt2looker
 
+## Unreleased
+### Added
+- support ephemeral models (#57)
+
 ## 0.11.0
 ### Added
 - support label and hidden fields (#49)
diff --git a/dbt2looker/models.py b/dbt2looker/models.py
index 3430eaf..9961182 100644
--- a/dbt2looker/models.py
+++ b/dbt2looker/models.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Union, Dict, List, Optional
+from typing import Any, Union, Dict, List, Optional
 try:
     from typing import Literal
 except ImportError:
@@ -144,6 +144,7 @@ class DbtModelColumn(BaseModel):
 class DbtNode(BaseModel):
     unique_id: str
     resource_type: str
+    config: Dict[str, Any]
 
 
 class Dbt2LookerExploreJoin(BaseModel):
@@ -224,4 +225,4 @@ def case_insensitive_column_names(cls, v: Dict[str, DbtCatalogNodeColumn]):
 
 
 class DbtCatalog(BaseModel):
-    nodes: Dict[str, DbtCatalogNode]
\ No newline at end of file
+    nodes: Dict[str, DbtCatalogNode]
diff --git a/dbt2looker/parser.py b/dbt2looker/parser.py
index ed310f3..03045c9 100644
--- a/dbt2looker/parser.py
+++ b/dbt2looker/parser.py
@@ -31,21 +31,21 @@ def tags_match(query_tag: str, model: models.DbtModel) -> bool:
 
 def parse_models(raw_manifest: dict, tag=None) -> List[models.DbtModel]:
     manifest = models.DbtManifest(**raw_manifest)
-    all_models: List[models.DbtModel] = [
+    materialized_models: List[models.DbtModel] = [
         node
         for node in manifest.nodes.values()
-        if node.resource_type == 'model'
+        if node.resource_type == 'model' and node.config['materialized'] != 'ephemeral'
     ]
 
     # Empty model files have many missing parameters
-    for model in all_models:
+    for model in materialized_models:
         if not hasattr(model, 'name'):
             logging.error('Cannot parse model with id: "%s" - is the model file empty?', model.unique_id)
             raise SystemExit('Failed')
 
     if tag is None:
-        return all_models
-    return [model for model in all_models if tags_match(tag, model)]
+        return materialized_models
+    return [model for model in materialized_models if tags_match(tag, model)]
 
 
 def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel]):

From 8aad35994af892777a60194ec7a88053c25c57bc Mon Sep 17 00:00:00 2001
From: Padraig Alton <padraig.alton@justpark.com>
Date: Fri, 9 Feb 2024 17:14:27 +0000
Subject: [PATCH 2/3] Reorder parse_models logic to ignore non-selected empty
 model files

---
 CHANGELOG.md         |  3 +++
 dbt2looker/parser.py | 11 +++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 676d862..7066f58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,9 @@ Recent and upcoming changes to dbt2looker
 ### Added
 - support ephemeral models (#57)
 
+### Changed
+- only non-ephemeral models _selected by tag logic_ are checked to ensure the model files are not empty (instead of all models) (#57)
+
 ## 0.11.0
 ### Added
 - support label and hidden fields (#49)
diff --git a/dbt2looker/parser.py b/dbt2looker/parser.py
index 03045c9..f49477f 100644
--- a/dbt2looker/parser.py
+++ b/dbt2looker/parser.py
@@ -37,15 +37,18 @@ def parse_models(raw_manifest: dict, tag=None) -> List[models.DbtModel]:
         if node.resource_type == 'model' and node.config['materialized'] != 'ephemeral'
     ]
 
+    if tag is None:
+        selected_models = materialized_models
+    else:
+        selected_models = [model for model in materialized_models if tags_match(tag, model)]
+
     # Empty model files have many missing parameters
-    for model in materialized_models:
+    for model in selected_models:
         if not hasattr(model, 'name'):
             logging.error('Cannot parse model with id: "%s" - is the model file empty?', model.unique_id)
             raise SystemExit('Failed')
 
-    if tag is None:
-        return materialized_models
-    return [model for model in materialized_models if tags_match(tag, model)]
+    return selected_models
 
 
 def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel]):

From 8c5a2c31ca3e5312eba64177b9144868c9215038 Mon Sep 17 00:00:00 2001
From: Padraig Alton <padraig.alton@justpark.com>
Date: Fri, 19 Jan 2024 13:28:11 +0000
Subject: [PATCH 3/3] Added helpful warnings for manifest/catalog discrepancy

---
 CHANGELOG.md         |  2 ++
 dbt2looker/parser.py | 45 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7066f58..9740204 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@ Recent and upcoming changes to dbt2looker
 ## Unreleased
 ### Added
 - support ephemeral models (#57)
+- warnings if there is a discrepancy between manifest and catalog (#5)
+- more descriptive error message when a column's data type can't be inferred due to not being in the catalog
 
 ### Changed
 - only non-ephemeral models _selected by tag logic_ are checked to ensure the model files are not empty (instead of all models) (#57)
diff --git a/dbt2looker/parser.py b/dbt2looker/parser.py
index f49477f..9e29d5b 100644
--- a/dbt2looker/parser.py
+++ b/dbt2looker/parser.py
@@ -57,6 +57,33 @@ def check_models_for_missing_column_types(dbt_typed_models: List[models.DbtModel
             logging.debug('Model %s has no typed columns, no dimensions will be generated. %s', model.unique_id, model)
 
 
+def compare_model_vs_node_columns(model: models.DbtModel, node: models.DbtCatalogNode):
+    model_columns = set(model.columns.keys())  # as defined in YML config
+    catalogued_columns = set(node.columns.keys())  # as defined in SQL
+
+    # if the YML and SQL columns exactly match, return early
+    if not model_columns.symmetric_difference(catalogued_columns):
+        return
+
+    if model_columns.issubset(catalogued_columns):
+        for undocumented_column in sorted(catalogued_columns.difference(model_columns)):
+            logging.warning(
+                f'Column {model.unique_id}.{undocumented_column} has not been documented in YML, '
+                'but is present in the catalog. You should add it to your YML config, '
+                'or (if it is not required) remove it from the model SQL file, run the model, '
+                'and run `dbt docs generate` again')
+        # after warning the user, return early
+        return
+    
+    # otherwise, there are columns defined in YML that don't match what's defined in SQL
+    for missing_column in sorted(model_columns.difference(catalogued_columns)):
+        logging.warning(
+            f'Column {model.unique_id}.{missing_column} documented in YML, '
+            'but is not defined in the DBT catalog. Check the model SQL file '
+            'and ensure you have run the model and `dbt docs generate`')
+    return  # final return explicitly included for clarity
+
+
 def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str] = None):
     catalog_nodes = parse_catalog_nodes(raw_catalog)
     dbt_models = parse_models(raw_manifest, tag=tag)
@@ -77,6 +104,11 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str]
             logging.warning(
                 f'Model {model.unique_id} not found in catalog. No looker view will be generated. '
                 f'Check if model has materialized in {adapter_type} at {model.relation_name}')
+        else:
+            # we know that the model is included in the catalog - extract it
+            corresponding_catalog_node = catalog_nodes[model.unique_id]
+            # issue warnings if the catalog columns (defined via SQL) don't match what's documented in YML
+            compare_model_vs_node_columns(model, corresponding_catalog_node)
 
     # Update dbt models with data types from catalog
     dbt_typed_models = [
@@ -95,7 +127,18 @@ def parse_typed_models(raw_manifest: dict, raw_catalog: dict, tag: Optional[str]
     return dbt_typed_models
 
 
+class ColumnNotInCatalogError(Exception):
+    def __init__(self, model_id: str, column_name: str):
+        super().__init__(
+            f'Column {column_name} not found in catalog for model {model_id}, '
+            'cannot find a data type for Looker. Is the column selected in the model SQL file, '
+            'and have you run the model since adding the column to it?')
+
+
 def get_column_type_from_catalog(catalog_nodes: Dict[str, models.DbtCatalogNode], model_id: str, column_name: str):
     node = catalog_nodes.get(model_id)
     column = None if node is None else node.columns.get(column_name)
-    return None if column is None else column.type
+    if column:
+        return column.type
+    # otherwise this will fail later when we try to map the data type to a Looker type
+    raise ColumnNotInCatalogError(model_id, column_name)