robusta-dev · nherment · Dec 9, 2024 · Dec 9, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ To this 👇
 
 ### Key Features
 - **Automatic data collection:** HolmesGPT surfaces up the observability data you need to investigate
-- **Secure:** *Read-only* access to your data - respects RBAC permissions 
+- **Secure:** *Read-only* access to your data - respects RBAC permissions
 - **Runbook automation and knowledge sharing:** Tell Holmes how you investigate today and it will automate it
 - **Extensible:** Add your own data sources (tools) and Holmes will use them to investigate
 - **Data Privacy:** Bring your own API key for any AI provider (OpenAI, Azure, AWS Bedrock, etc)
@@ -491,7 +491,7 @@ To use Vertex AI with Gemini models, set the following environment variables:
 
 ```bash
 export VERTEXAI_PROJECT="your-project-id"
-export VERTEXAI_LOCATION="us-central1" 
+export VERTEXAI_LOCATION="us-central1"
 export GOOGLE_APPLICATION_CREDENTIALS="path/to/your/service_account_key.json"
 ```
 
@@ -538,9 +538,9 @@ If your llm provider url uses a certificate from a custom CA, in order to trust
 <summary>Confluence</summary>
 HolmesGPT can read runbooks from Confluence. To give it access, set the following environment variables:
 
-* CONFLUENCE_BASE_URL - e.g. https://robusta-dev-test.atlassian.net
-* CONFLUENCE_USER - e.g. [email protected]
-* CONFLUENCE_API_KEY - [refer to Atlassian docs on generating API keys](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/)
+* `CONFLUENCE_BASE_URL` - e.g. https://robusta-dev-test.atlassian.net
+* `CONFLUENCE_USER` - e.g. [email protected]
+* `CONFLUENCE_API_KEY` - [refer to Atlassian docs on generating API keys](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/)
 </details>
 
 <details>
@@ -563,13 +563,59 @@ HolmesGPT can consult webpages containing runbooks or other relevant information
 HolmesGPT uses playwright to scrape webpages and requires playwright to be installed and working through `playwright install`.
 </details>
 
+<details>
+<summary>
+Using Grafana Loki
+</summary>
+
+HolmesGPT can consult logs from [Loki](https://grafana.com/oss/loki/) by proxying through a [Grafana](https://grafana.com/oss/grafana/) instance.
+
+There are 2 parts to configuring access to Grafana Loki: Access/Authentication and search terms.
+
+For access and authentication, add the following environment variables:
+
+* `GRAFANA_URL` - e.g. https://my-org.grafana.net
+* `GRAFANA_API_KEY` - e.g. glsa_bsm6ZS_sdfs25f
+
+For search terms, you can optionally tweak the search terms used by the toolset.
+This is done by appending the following to your Holmes configuration file:
+
+```yaml
+grafana:
+  url: https://my-org.grafana.net #
+  api_key: glsa_bsm6ZS_sdfs25f
+  loki:
+    pod_name_search_key: "pod"
+    namespace_search_key: "namespace"
+    node_name_search_key: "node"
+```
+
+> You only need to tweak the configuration file if your Loki logs settings for pod, namespace and node differ from the above defaults.
+
+The Loki toolset is configured the using the same Grafana settings as the Grafana Tempo toolset.
+</details>
+
+<summary>
+Using Grafana Tempo
+</summary>
+
+HolmesGPT can fetch trace information from Grafana Tempo to debug performance related issues.
+
+Tempo is configured the using the same Grafana settings as the Grafana Loki toolset.
+
+grafana:
+  url: https://my-org.grafana.net #
+</details>
+
+
 <details>
 <summary>
 ArgoCD
 </summary>
 
 Holmes can use the `argocd` CLI to get details about the ArgoCD setup like the apps configuration and status, clusters and projects within ArgoCD.
 To enable ArgoCD, set the `ARGOCD_AUTH_TOKEN` environment variable as described in the [argocd documentation](https://argo-cd.readthedocs.io/en/latest/user-guide/commands/argocd_account_generate-token/).
+
 </details>
 
 ## More Use Cases

diff --git a/holmes/config.py b/holmes/config.py
@@ -13,7 +13,7 @@
 
 from holmes.core.runbooks import RunbookManager
 from holmes.core.supabase_dal import SupabaseDal
-from holmes.core.tool_calling_llm import (IssueInvestigator, 
+from holmes.core.tool_calling_llm import (IssueInvestigator,
                                           ToolCallingLLM,
                                           ToolExecutor)
 from holmes.core.tools import ToolsetPattern, get_matching_toolsets, ToolsetStatusEnum, ToolsetTag
@@ -27,6 +27,7 @@
 from holmes.plugins.sources.prometheus.plugin import AlertManagerSource
 from holmes.plugins.toolsets import (load_builtin_toolsets,
                                      load_toolsets_from_file)
+from holmes.plugins.toolsets.grafana.common import GrafanaConfig
 from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
 from holmes.utils.definitions import CUSTOM_TOOLSET_LOCATION
 from pydantic import ValidationError
@@ -77,7 +78,9 @@ class Config(RobustaBaseConfig):
 
     custom_runbooks: List[FilePath] = []
     custom_toolsets: List[FilePath] = []
-
+
+    grafana: GrafanaConfig = GrafanaConfig()
+
     enabled_toolsets_names: List[str] = Field(default_factory=list)
 
     @classmethod
@@ -109,7 +112,7 @@ def load_from_env(cls):
                 kwargs[field_name] = val
             kwargs["cluster_name"] = Config.__get_cluster_name()
         return cls(**kwargs)
-    
+
     @staticmethod
     def __get_cluster_name() -> Optional[str]:
         config_file_path = ROBUSTA_CONFIG_PATH
@@ -133,17 +136,17 @@ def create_console_tool_executor(
         self, console: Console, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal]
     ) -> ToolExecutor:
         """
-        Creates ToolExecutor for the cli 
+        Creates ToolExecutor for the cli
         """
-        default_toolsets = [toolset for toolset in load_builtin_toolsets(dal) if any(tag in (ToolsetTag.CORE, ToolsetTag.CLI) for tag in toolset.tags)]
-        
+        default_toolsets = [toolset for toolset in load_builtin_toolsets(dal, grafana_config=self.grafana) if any(tag in (ToolsetTag.CORE, ToolsetTag.CLI) for tag in toolset.tags)]
+
         if allowed_toolsets == "*":
             matching_toolsets = default_toolsets
         else:
             matching_toolsets = get_matching_toolsets(
                 default_toolsets, allowed_toolsets.split(",")
-            )        
-        
+            )
+
         # Enable all matching toolsets that have CORE or CLI tag
         for toolset in matching_toolsets:
             toolset.enabled = True
@@ -155,7 +158,7 @@ def create_console_tool_executor(
             toolsets_loaded_from_config,
             matched_default_toolsets_by_name,
         )
-        
+
         for toolset in filtered_toolsets_by_name.values():
             if toolset.enabled:
                 toolset.check_prerequisites()
@@ -169,11 +172,11 @@ def create_console_tool_executor(
                 logging.info(f"Disabled toolset: {ts.name} from {ts.get_path()})")
             elif ts.get_status() == ToolsetStatusEnum.FAILED:
                 logging.info(f"Failed loading toolset {ts.name} from {ts.get_path()}: ({ts.get_error()})")
-        
+
         for ts in default_toolsets:
             if ts.name not in filtered_toolsets_by_name.keys():
                  logging.debug(f"Toolset {ts.name} from {ts.get_path()} was filtered out due to allowed_toolsets value")
-        
+
         enabled_tools = concat(*[ts.tools for ts in enabled_toolsets])
         logging.debug(
             f"Starting AI session with tools: {[t.name for t in enabled_tools]}"
@@ -184,10 +187,10 @@ def create_tool_executor(
         self, console: Console, dal:Optional[SupabaseDal]
     ) -> ToolExecutor:
         """
-        Creates ToolExecutor for the server endpoints 
+        Creates ToolExecutor for the server endpoints
         """
 
-        all_toolsets = load_builtin_toolsets(dal=dal)
+        all_toolsets = load_builtin_toolsets(dal=dal, grafana_config=self.grafana)
 
         if os.path.isfile(CUSTOM_TOOLSET_LOCATION):
             try:
@@ -201,7 +204,7 @@ def create_tool_executor(
             f"Starting AI session with tools: {[t.name for t in enabled_tools]}"
         )
         return ToolExecutor(enabled_toolsets)
-    
+
     def create_console_toolcalling_llm(
         self, console: Console, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal] = None
     ) -> ToolCallingLLM:
@@ -239,7 +242,7 @@ def create_issue_investigator(
             self.max_steps,
             self._get_llm()
         )
-    
+
     def create_console_issue_investigator(
         self,
         console: Console,

diff --git a/holmes/core/tools.py b/holmes/core/tools.py
@@ -234,6 +234,15 @@ class StaticPrerequisite(BaseModel):
     enabled: bool
     disabled_reason: str
 
+class EnvironmentVariablePrerequisite(StaticPrerequisite):
+    def __init__(self, env_var_name:str) -> None:
+        env_var = os.environ.get(env_var_name)
+        enabled = False
+        disabled_reason = f'Missing environment variable "{env_var_name}"'
+        if env_var:
+            enabled = True
+            disabled_reason = ""
+        super().__init__(enabled=enabled, disabled_reason=disabled_reason)
 
 class ToolsetCommandPrerequisite(BaseModel):
     command: str  # must complete successfully (error code 0) for prereq to be satisfied

diff --git a/holmes/plugins/toolsets/__init__.py b/holmes/plugins/toolsets/__init__.py
@@ -5,22 +5,21 @@
 
 from holmes.core.supabase_dal import SupabaseDal
 from holmes.plugins.toolsets.findings import FindingsToolset
+from holmes.plugins.toolsets.grafana.common import GrafanaConfig
+from holmes.plugins.toolsets.grafana.toolset_grafana_loki import GrafanaLokiToolset
+from holmes.plugins.toolsets.grafana.toolset_grafana_tempo import GrafanaTempoToolset
 from holmes.plugins.toolsets.internet import InternetToolset
 from pydantic import BaseModel
 
 from holmes.core.tools import Toolset, YAMLToolset
 from typing import Dict
-from pydantic import BaseModel
-from typing import Optional
 import yaml
 
 THIS_DIR = os.path.abspath(os.path.dirname(__file__))
 
-
 class ToolsetsYaml(BaseModel):
     toolsets: Dict[str, YAMLToolset]
 
-
 def load_toolsets_from_file(path: str, silent_fail: bool = False) -> List[YAMLToolset]:
     file_toolsets = []
     with open(path) as file:
@@ -31,20 +30,22 @@ def load_toolsets_from_file(path: str, silent_fail: bool = False) -> List[YAMLTo
                 toolset = YAMLToolset(**config, name=name)
                 toolset.set_path(path)
                 file_toolsets.append(YAMLToolset(**config, name=name))
-            except Exception as e:
+            except Exception:
                 if not silent_fail:
                     logging.error(f"Error happened while loading {name} toolset from {path}",
                                   exc_info=True)
 
     return file_toolsets
 
-
-def load_python_toolsets(dal:Optional[SupabaseDal]) -> List[Toolset]:
+def load_python_toolsets(dal:Optional[SupabaseDal], grafana_config:Optional[GrafanaConfig]) -> List[Toolset]:
     logging.debug("loading python toolsets")
-    return [InternetToolset(), FindingsToolset(dal)]
+    if not grafana_config:
+        # passing an empty config simplifies the downstream code
+        grafana_config = GrafanaConfig()
 
+    return [InternetToolset(), FindingsToolset(dal), GrafanaLokiToolset(grafana_config), GrafanaTempoToolset(grafana_config)]
 
-def load_builtin_toolsets(dal:Optional[SupabaseDal] = None) -> List[Toolset]:
+def load_builtin_toolsets(dal:Optional[SupabaseDal] = None, grafana_config:Optional[GrafanaConfig] = GrafanaConfig()) -> List[Toolset]:
     all_toolsets = []
     logging.debug(f"loading toolsets from {THIS_DIR}")
     for filename in os.listdir(THIS_DIR):
@@ -53,5 +54,5 @@ def load_builtin_toolsets(dal:Optional[SupabaseDal] = None) -> List[Toolset]:
         path = os.path.join(THIS_DIR, filename)
         all_toolsets.extend(load_toolsets_from_file(path))
 
-    all_toolsets.extend(load_python_toolsets(dal))
+    all_toolsets.extend(load_python_toolsets(dal, grafana_config))
     return all_toolsets
diff --git a/holmes/plugins/toolsets/grafana/__init__.py b/holmes/plugins/toolsets/grafana/__init__.py
diff --git a/holmes/plugins/toolsets/grafana/common.py b/holmes/plugins/toolsets/grafana/common.py
@@ -0,0 +1,71 @@
+
+from typing import Dict, List, Optional, Union, Tuple
+import uuid
+import time
+import os
+from pydantic import BaseModel
+
+from holmes.core.tools import StaticPrerequisite
+
+GRAFANA_URL_ENV_NAME = "GRAFANA_URL"
+GRAFANA_API_KEY_ENV_NAME = "GRAFANA_API_KEY"
+ONE_HOUR_IN_SECONDS = 3600
+
+class GrafanaLokiConfig(BaseModel):
+    pod_name_search_key: str = "pod"
+    namespace_search_key: str = "namespace"
+    node_name_search_key: str = "node"
+
+class GrafanaConfig(BaseModel):
+    loki: GrafanaLokiConfig = GrafanaLokiConfig()
+    api_key: str = os.environ.get(GRAFANA_API_KEY_ENV_NAME, "")
+    url: str = os.environ.get(GRAFANA_URL_ENV_NAME, "")
+
+def is_grafana_configured(config:GrafanaConfig) -> Tuple[bool, List[str]]:
+    errors = []
+    if not config.api_key:
+        errors.append(f"api_key is missing from the grafana configuration. Either set the api_key for grafana or set the environment variable {GRAFANA_API_KEY_ENV_NAME}")
+    if not config.url:
+        errors.append(f"url is missing from the grafana configuration. Either set the api_key for grafana or set the environment variable {GRAFANA_URL_ENV_NAME}")
+
+    return (len(errors) == 0, errors)
+
+def headers(api_key:str):
+    return {
+        'Authorization': f'Bearer {api_key}',
+        'Accept': 'application/json',
+        'Content-Type': 'application/json'
+    }
+
+def process_timestamps(start_timestamp: Optional[Union[int, str]], end_timestamp: Optional[Union[int, str]]):
+    if start_timestamp and isinstance(start_timestamp, str):
+        start_timestamp = int(start_timestamp)
+    if end_timestamp and isinstance(end_timestamp, str):
+        end_timestamp = int(end_timestamp)
+
+    if not end_timestamp:
+        end_timestamp = int(time.time())
+    if not start_timestamp:
+        start_timestamp = end_timestamp - ONE_HOUR_IN_SECONDS
+    if start_timestamp < 0:
+        start_timestamp = end_timestamp + start_timestamp
+    return (start_timestamp, end_timestamp)
+
+def get_param_or_raise(dict:Dict, param:str) -> str:
+    value = dict.get(param)
+    if not value:
+        raise Exception(f'Missing param "{param}"')
+    return value
+
+def get_datasource_id(dict:Dict, param:str) -> str:
+    datasource_id=get_param_or_raise(dict, param)
+    try:
+        if uuid.UUID(datasource_id, version=4):
+            return f"uid/{datasource_id}"
+    except ValueError:
+        pass
+    return datasource_id
+
+def get_grafana_toolset_prerequisite(config:GrafanaConfig) -> StaticPrerequisite:
+    enabled, errors = is_grafana_configured(config)
+    return StaticPrerequisite(enabled=enabled, disabled_reason=", ".join(errors))