Canner · paopa · Dec 4, 2024 · Dec 4, 2024 · Dec 5, 2024 · Dec 5, 2024
diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml
@@ -143,7 +143,7 @@ data:
       - name: sql_regeneration
         llm: litellm_llm.gpt-4o-mini-2024-07-18
         engine: wren_ui
-      - name: semantics_description
+      - name: semantics_enrichment
         llm: litellm_llm.gpt-4o-mini-2024-07-18
       - name: relationship_recommendation
         llm: litellm_llm.gpt-4o-mini-2024-07-18

diff --git a/docker/config.example.yaml b/docker/config.example.yaml
@@ -95,7 +95,7 @@ pipes:
   - name: sql_regeneration
     llm: litellm_llm.gpt-4o-mini-2024-07-18
     engine: wren_ui
-  - name: semantics_description
+  - name: semantics_enrichment
     llm: litellm_llm.gpt-4o-mini-2024-07-18
   - name: relationship_recommendation
     llm: litellm_llm.gpt-4o-mini-2024-07-18

diff --git a/wren-ai-service/docs/config_examples/config.google_ai_studio.yaml b/wren-ai-service/docs/config_examples/config.google_ai_studio.yaml
@@ -80,7 +80,7 @@ pipes:
   - name: sql_regeneration
     llm: litellm_llm.gemini/gemini-2.0-flash-exp
     engine: wren_ui
-  - name: semantics_description
+  - name: semantics_enrichment
     llm: litellm_llm.gemini/gemini-2.0-flash-exp
   - name: relationship_recommendation
     llm: litellm_llm.gemini/gemini-2.0-flash-exp

diff --git a/wren-ai-service/docs/config_examples/config.groq.yaml b/wren-ai-service/docs/config_examples/config.groq.yaml
@@ -82,7 +82,7 @@ pipes:
   - name: sql_regeneration
     llm: litellm_llm.groq/llama-3.3-70b-specdec
     engine: wren_ui
-  - name: semantics_description
+  - name: semantics_enrichment
     llm: litellm_llm.groq/llama-3.3-70b-specdec
   - name: relationship_recommendation
     llm: litellm_llm.groq/llama-3.3-70b-specdec

diff --git a/wren-ai-service/docs/config_examples/config.ollama.yaml b/wren-ai-service/docs/config_examples/config.ollama.yaml
@@ -80,7 +80,7 @@ pipes:
   - name: sql_regeneration
     llm: litellm_llm.openai/phi4:14b
     engine: wren_ui
-  - name: semantics_description
+  - name: semantics_enrichment
     llm: litellm_llm.openai/phi4:14b
   - name: relationship_recommendation
     llm: litellm_llm.openai/phi4:14b

diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py
@@ -13,7 +13,7 @@
 from src.web.v1.services.chart_adjustment import ChartAdjustmentService
 from src.web.v1.services.question_recommendation import QuestionRecommendation
 from src.web.v1.services.relationship_recommendation import RelationshipRecommendation
-from src.web.v1.services.semantics_description import SemanticsDescription
+from src.web.v1.services.semantics_enrichment import SemanticsEnrichment
 from src.web.v1.services.semantics_preparation import SemanticsPreparationService
 from src.web.v1.services.sql_answer import SqlAnswerService
 from src.web.v1.services.sql_expansion import SqlExpansionService
@@ -29,18 +29,18 @@
 class ServiceContainer:
     ask_service: AskService
     ask_details_service: AskDetailsService
+    chart_service: ChartService
+    chart_adjustment_service: ChartAdjustmentService
     question_recommendation: QuestionRecommendation
     relationship_recommendation: RelationshipRecommendation
-    semantics_description: SemanticsDescription
+    semantics_enrichment: SemanticsEnrichment
     semantics_preparation_service: SemanticsPreparationService
-    chart_service: ChartService
-    chart_adjustment_service: ChartAdjustmentService
     sql_answer_service: SqlAnswerService
     sql_expansion_service: SqlExpansionService
     sql_explanation_service: SqlExplanationService
-    sql_regeneration_service: SqlRegenerationService
     sql_pairs_preparation_service: SqlPairsPreparationService
     sql_question_service: SqlQuestionService
+    sql_regeneration_service: SqlRegenerationService
 
 
 @dataclass
@@ -58,10 +58,10 @@ def create_service_container(
         "ttl": settings.query_cache_ttl,
     }
     return ServiceContainer(
-        semantics_description=SemanticsDescription(
+        semantics_enrichment=SemanticsEnrichment(
             pipelines={
-                "semantics_description": generation.SemanticsDescription(
-                    **pipe_components["semantics_description"],
+                "semantics_enrichment": generation.SemanticsEnrichment(
+                    **pipe_components["semantics_enrichment"],
                 )
             },
             **query_cache,

diff --git a/wren-ai-service/src/pipelines/common.py b/wren-ai-service/src/pipelines/common.py
@@ -41,7 +41,7 @@ def dry_run_pipeline(pipeline_cls: BasicPipeline, pipeline_name: str, **kwargs):
     from src.providers import generate_components
     from src.utils import init_langfuse, setup_custom_logger
 
-    setup_custom_logger("wren-ai-service", level_str=settings.logging_level)
+    setup_custom_logger("wren-ai-service", level_str=settings.logging_level, is_dev=True)
 
     pipe_components = generate_components(settings.components)
     pipeline = pipeline_cls(**pipe_components[pipeline_name])

diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py
@@ -5,7 +5,7 @@
 from .intent_classification import IntentClassification
 from .question_recommendation import QuestionRecommendation
 from .relationship_recommendation import RelationshipRecommendation
-from .semantics_description import SemanticsDescription
+from .semantics_enrichment import SemanticsEnrichment
 from .sql_answer import SQLAnswer
 from .sql_breakdown import SQLBreakdown
 from .sql_correction import SQLCorrection
@@ -18,22 +18,22 @@
 from .sql_summary import SQLSummary
 
 __all__ = [
-    "SQLRegeneration",
-    "ChartGeneration",
     "ChartAdjustment",
+    "ChartGeneration",
     "DataAssistance",
     "FollowUpSQLGeneration",
     "IntentClassification",
     "QuestionRecommendation",
     "RelationshipRecommendation",
-    "SemanticsDescription",
+    "SemanticsEnrichment",
     "SQLAnswer",
     "SQLBreakdown",
     "SQLCorrection",
     "SQLExpansion",
     "SQLExplanation",
     "SQLGeneration",
     "SQLGenerationReasoning",
-    "SQLSummary",
     "SQLQuestion",
+    "SQLRegeneration",
+    "SQLSummary",
 ]
diff --git a/...lines/generation/semantics_description.py → ...elines/generation/semantics_enrichment.py b/...lines/generation/semantics_description.py → ...elines/generation/semantics_enrichment.py
@@ -17,7 +17,7 @@
 
 ## Start of Pipeline
 @observe(capture_input=False)
-def picked_models(mdl: dict, selected_models: list[str]) -> list[dict]:
+def picked_models(mdl: dict) -> list[dict]:
     def relation_filter(column: dict) -> bool:
         return "relationship" not in column
 
@@ -27,6 +27,7 @@ def column_formatter(columns: list[dict]) -> list[dict]:
                 "name": column["name"],
                 "type": column["type"],
                 "properties": {
+                    "alias": column["properties"].get("displayName", ""),
                     "description": column["properties"].get("description", ""),
                 },
             }
@@ -35,19 +36,17 @@ def column_formatter(columns: list[dict]) -> list[dict]:
         ]
 
     def extract(model: dict) -> dict:
+        prop = model["properties"]
         return {
             "name": model["name"],
             "columns": column_formatter(model["columns"]),
             "properties": {
-                "description": model["properties"].get("description", ""),
+                "alias": prop.get("displayName", ""),
+                "description": prop.get("description", ""),
             },
         }
 
-    return [
-        extract(model)
-        for model in mdl.get("models", [])
-        if model.get("name", "") in selected_models
-    ]
+    return [extract(model) for model in mdl.get("models", [])]
 
 
 @observe(capture_input=False)
@@ -89,95 +88,60 @@ def wrapper(text: str) -> str:
 
 
 ## End of Pipeline
-class ModelProperties(BaseModel):
+class Properties(BaseModel):
+    alias: str
     description: str
 
 
 class ModelColumns(BaseModel):
     name: str
-    properties: ModelProperties
+    properties: Properties
 
 
 class SemanticModel(BaseModel):
     name: str
     columns: list[ModelColumns]
-    properties: ModelProperties
+    properties: Properties
 
 
 class SemanticResult(BaseModel):
     models: list[SemanticModel]
 
 
-SEMANTICS_DESCRIPTION_MODEL_KWARGS = {
+SEMANTICS_ENRICHMENT_KWARGS = {
     "response_format": {
         "type": "json_schema",
         "json_schema": {
-            "name": "semantic_description",
+            "name": "semantics_enrichment",
             "schema": SemanticResult.model_json_schema(),
         },
     }
 }
 
 system_prompt = """
-I have a data model represented in JSON format, with the following structure:
-
-```
-[
-    {'name': 'model', 'columns': [
-            {'name': 'column_1', 'type': 'type', 'properties': {}
-            },
-            {'name': 'column_2', 'type': 'type', 'properties': {}
-            },
-            {'name': 'column_3', 'type': 'type', 'properties': {}
-            }
-        ], 'properties': {}
-    }
-]
-```
-
-Your task is to update this JSON structure by adding a `description` field inside both the `properties` attribute of each `column` and the `model` itself.
-Each `description` should be derived from a user-provided input that explains the purpose or context of the `model` and its respective columns.
-Follow these steps:
-1. **For the `model`**: Prompt the user to provide a brief description of the model's overall purpose or its context. Insert this description in the `properties` field of the `model`.
-2. **For each `column`**: Ask the user to describe each column's role or significance. Each column's description should be added under its respective `properties` field in the format: `'description': 'user-provided text'`.
-3. Ensure that the output is a well-formatted JSON structure, preserving the input's original format and adding the appropriate `description` fields.
-
-### Output Format:
-
-```
-{
-    "models": [
-        {
-        "name": "model",
-        "columns": [
-            {
-                "name": "column_1",
-                "properties": {
-                    "description": "<description for column_1>"
-                }
-            },
-            {
-                "name": "column_2",
-                "properties": {
-                    "description": "<description for column_1>"
-                }
-            },
-            {
-                "name": "column_3",
-                "properties": {
-                    "description": "<description for column_1>"
-                }
-            }
-        ],
-        "properties": {
-                "description": "<description for model>"
-            }
-        }
-    ]
-}
-```
-
-Make sure that the descriptions are concise, informative, and contextually appropriate based on the input provided by the user.
+You are a data model expert. Your task is to enrich a JSON data model with descriptive metadata.
+
+Input Format:
+[{
+    'name': 'model',
+    'columns': [{'name': 'column', 'type': 'type', 'properties': {'alias': 'alias', 'description': 'description'}}],
+    'properties': {'alias': 'alias', 'description': 'description'}
+}]
+
+For each model and column, you will:
+1. Add a clear, concise alias that serves as a business-friendly name
+2. Add a detailed description explaining its purpose and usage
+
+Guidelines:
+- Descriptions should be clear, concise and business-focused
+- Aliases should be intuitive and user-friendly
+- Use the user's context to inform the descriptions
+- Maintain technical accuracy while being accessible to non-technical users
+- IMPORTANT: Never modify the model/table and column names in the 'name' field as this will invalidate the data model
+- Only update the 'alias' field to provide user-friendly display names
+- When the user prompt includes operators to modify names, apply those modifications to the alias field only
+
+Focus on providing business value through clear, accurate descriptions while maintaining JSON structure integrity.
 """
 
 user_prompt_template = """
@@ -186,17 +150,17 @@ class SemanticResult(BaseModel):
 Picked models: {{ picked_models }}
 Localization Language: {{ language }}
 
-Please provide a brief description for the model and each column based on the user's prompt.
+Please provide a brief description and alias for the model and each column based on the user's prompt.
 """
 
 
-class SemanticsDescription(BasicPipeline):
+class SemanticsEnrichment(BasicPipeline):
     def __init__(self, llm_provider: LLMProvider, **_):
         self._components = {
             "prompt_builder": PromptBuilder(template=user_prompt_template),
             "generator": llm_provider.get_generator(
                 system_prompt=system_prompt,
-                generation_kwargs=SEMANTICS_DESCRIPTION_MODEL_KWARGS,
+                generation_kwargs=SEMANTICS_ENRICHMENT_KWARGS,
             ),
         }
         self._final = "normalize"
@@ -209,16 +173,13 @@ def __init__(self, llm_provider: LLMProvider, **_):
     async def run(
         self,
         user_prompt: str,
-        selected_models: list[str],
         mdl: dict,
         language: str = "en",
     ) -> dict:
-        logger.info("Semantics Description Generation pipeline is running...")
         return await self._pipe.execute(
             [self._final],
             inputs={
                 "user_prompt": user_prompt,
-                "selected_models": selected_models,
                 "mdl": mdl,
                 "language": language,
                 **self._components,
@@ -230,10 +191,9 @@ async def run(
     from src.pipelines.common import dry_run_pipeline
 
     dry_run_pipeline(
-        SemanticsDescription,
-        "semantics_description",
+        SemanticsEnrichment,
+        "semantics_enrichment",
         user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support",
-        selected_models=[],
         mdl={},
         language="en",
     )
diff --git a/wren-ai-service/src/pipelines/indexing/__init__.py b/wren-ai-service/src/pipelines/indexing/__init__.py
@@ -120,8 +120,8 @@ async def run(
 
 __all__ = [
     "DBSchema",
-    "TableDescription",
     "HistoricalQuestion",
-    "SqlPairsDeletion",
     "SqlPairs",
+    "SqlPairsDeletion",
+    "TableDescription",
 ]
diff --git a/wren-ai-service/src/web/v1/routers/__init__.py b/wren-ai-service/src/web/v1/routers/__init__.py
@@ -7,7 +7,7 @@
     chart_adjustment,
     question_recommendation,
     relationship_recommendation,
-    semantics_description,
+    semantics_enrichment,
     semantics_preparation,
     sql_answers,
     sql_expansions,
@@ -22,7 +22,7 @@
 router.include_router(ask_details.router)
 router.include_router(question_recommendation.router)
 router.include_router(relationship_recommendation.router)
-router.include_router(semantics_description.router)
+router.include_router(semantics_enrichment.router)
 router.include_router(semantics_preparation.router)
 router.include_router(sql_answers.router)
 router.include_router(sql_expansions.router)