diff --git a/docs/custom_fences.py b/docs/custom_fences.py index b0882639f..88eb18fa5 100644 --- a/docs/custom_fences.py +++ b/docs/custom_fences.py @@ -14,6 +14,11 @@ def toolset_config_fence_format(source, language, css_class, options, md, **kwar """ Format YAML content into Holmes CLI, Holmes Helm Chart, and Robusta Helm Chart tabs for toolset configuration. This fence does NOT process Jinja2, so {{ env.VAR }} stays as-is. + + Supports additional content for each tab via special comments at the start of the YAML: + # __CLI_EXTRA__: Extra content to add to the CLI tab + # __HOLMES_HELM_EXTRA__: Extra content to add to the Holmes Helm tab + # __ROBUSTA_HELM_EXTRA__: Extra content to add to the Robusta Helm tab """ # Generate unique IDs for this tab group to prevent conflicts tab_group_id = str(uuid.uuid4()).replace("-", "_") @@ -22,11 +27,40 @@ def toolset_config_fence_format(source, language, css_class, options, md, **kwar tab_id_3 = f"__tabbed_{tab_group_id}_3" group_name = f"__tabbed_{tab_group_id}" - # Escape HTML in the source to prevent XSS - escaped_source = html.escape(source) + # Parse special directives from source + lines = source.strip().split("\n") + cli_extra = "" + holmes_helm_extra = "" + robusta_helm_extra = "" + filtered_lines = [] + + for line in lines: + # Check if line contains special directives and extract them + if "# __CLI_EXTRA__:" in line: + # Extract the content after the directive + parts = line.split("# __CLI_EXTRA__:") + if len(parts) > 1: + cli_extra = parts[1].strip() + # Don't add this line to filtered_lines + elif "# __HOLMES_HELM_EXTRA__:" in line: + parts = line.split("# __HOLMES_HELM_EXTRA__:") + if len(parts) > 1: + holmes_helm_extra = parts[1].strip() + # Don't add this line to filtered_lines + elif "# __ROBUSTA_HELM_EXTRA__:" in line: + parts = line.split("# __ROBUSTA_HELM_EXTRA__:") + if len(parts) > 1: + robusta_helm_extra = parts[1].strip() + # Don't add this line to filtered_lines + else: + # Regular line - add to filtered content + filtered_lines.append(line) + + # Join filtered lines back to get clean YAML + yaml_content = "\n".join(filtered_lines).strip() - # Strip any leading/trailing whitespace - yaml_content = source.strip() + # Escape HTML in the source to prevent XSS + escaped_source = html.escape(yaml_content) # Indent the yaml content for Robusta (add 2 spaces to each line under holmes:) robusta_yaml_lines = yaml_content.split("\n") @@ -34,6 +68,22 @@ def toolset_config_fence_format(source, language, css_class, options, md, **kwar " " + line if line else "" for line in robusta_yaml_lines ) + # Format extra content as HTML if present + if cli_extra: + # Support basic markdown-like formatting + if cli_extra.startswith("export ") or cli_extra.startswith("$"): + # Code block for environment variables + cli_extra = f'

💡 Alternative

Set the PROMETHEUS_URL environment variable instead of using the config file:

{html.escape(cli_extra)}
' + else: + # Regular text + cli_extra = f'

💡 Alternative

{html.escape(cli_extra)}

' + + if holmes_helm_extra: + holmes_helm_extra = f'

{html.escape(holmes_helm_extra)}

' + + if robusta_helm_extra: + robusta_helm_extra = f'

{html.escape(robusta_helm_extra)}

' + # Build the tabbed HTML structure for CLI, Holmes Helm, and Robusta tabs_html = f"""
@@ -49,10 +99,12 @@ def toolset_config_fence_format(source, language, css_class, options, md, **kwar

Add the following to ~/.holmes/config.yaml. Create the file if it doesn't exist:

{escaped_source}
+{cli_extra}

When using the standalone Holmes Helm Chart, update your values.yaml:

{escaped_source}
+{holmes_helm_extra}

Apply the configuration:

helm upgrade holmes holmes/holmes --values=values.yaml
@@ -60,6 +112,7 @@ def toolset_config_fence_format(source, language, css_class, options, md, **kwar

When using the Robusta Helm Chart (which includes HolmesGPT), update your generated_values.yaml:

holmes:
 {html.escape(robusta_yaml_indented)}
+{robusta_helm_extra}

Apply the configuration:

helm upgrade robusta robusta/robusta --values=generated_values.yaml --set clusterName=<YOUR_CLUSTER_NAME>
diff --git a/docs/data-sources/builtin-toolsets/coralogix-logs.md b/docs/data-sources/builtin-toolsets/coralogix-logs.md deleted file mode 100644 index a27310c49..000000000 --- a/docs/data-sources/builtin-toolsets/coralogix-logs.md +++ /dev/null @@ -1,114 +0,0 @@ -# Coralogix logs - -By enabling this toolset, HolmesGPT will fetch pod logs from [Coralogix](https://coralogix.com/). - ---8<-- "snippets/toolsets_that_provide_logging.md" - -## Prerequisites - -1. A [Coralogix API key](https://coralogix.com/docs/developer-portal/apis/data-query/direct-archive-query-http-api/#api-key) which is assigned the `DataQuerying` permission preset -2. A [Coralogix domain](https://coralogix.com/docs/user-guides/account-management/account-settings/coralogix-domain/). For example `eu2.coralogix.com` -3. Your team's [name or hostname](https://coralogix.com/docs/user-guides/account-management/organization-management/create-an-organization/#teams-in-coralogix). For example `your-company-name` - -You can deduce the `domain` and `team_hostname` configuration fields by looking at the URL you use to access the Coralogix UI. - -For example if you access Coralogix at `https://my-team.app.eu2.coralogix.com/` then the `team_hostname` is `my-team` and the Coralogix `domain` is `eu2.coralogix.com`. - -## Configuration - -```yaml-toolset-config -toolsets: - coralogix/logs: - enabled: true - config: - api_key: "" - domain: "eu2.coralogix.com" - team_hostname: "your-company-name" - - kubernetes/logs: - enabled: false # Disable default Kubernetes logging -``` - -## Custom Labels Configuration (Optional) - -By default, the Coralogix toolset expects logs to use standard Kubernetes field names. If your Coralogix deployment uses different field names for Kubernetes metadata, you can customize the label mappings. - -This is useful when: - -- Your log ingestion pipeline uses custom field names -- You have a non-standard Coralogix setup with different metadata fields -- Your Kubernetes logs are structured differently in Coralogix - -To find the correct field names, examine your logs in the Coralogix UI and identify how pod names, namespaces, log messages, and timestamps are labeled. - -### Example with Custom Labels - -```yaml-toolset-config -toolsets: - coralogix/logs: - enabled: true - config: - api_key: "" - domain: "eu2.coralogix.com" - team_hostname: "your-company-name" - labels: - namespace: "resource.attributes.k8s.pod.name" # Default - pod: "resource.attributes.k8s.namespace.name" # Default - log_message: "logRecord.body" # Default - timestamp: "logRecord.attributes.time" # Default - - kubernetes/logs: - enabled: false # Disable default Kubernetes logging -``` - -**Label Configuration Fields:** - -- `namespace`: Field path for Kubernetes namespace name -- `pod`: Field path for Kubernetes pod name -- `log_message`: Field path for the actual log message content -- `timestamp`: Field path for log timestamp - -All label fields are optional and will use the defaults shown above if not specified. - -## Logs Retrieval Strategy (Optional) - -Coralogix stores logs in two tiers with different performance characteristics: - -- **Frequent Search**: Fast queries with limited retention -- **Archive**: Slower queries but longer retention period - -You can configure how HolmesGPT retrieves logs using the `logs_retrieval_methodology` setting: - -### Available Strategies - -- `ARCHIVE_FALLBACK` (default): Try Frequent Search first, fallback to Archive if no results -- `FREQUENT_SEARCH_ONLY`: Only search Frequent Search tier -- `ARCHIVE_ONLY`: Only search Archive tier -- `BOTH_FREQUENT_SEARCH_AND_ARCHIVE`: Search both tiers and merge results -- `FREQUENT_SEARCH_FALLBACK`: Try Archive first, fallback to Frequent Search if no results - -### Example Configuration - -```yaml-toolset-config -toolsets: - coralogix/logs: - enabled: true - config: - api_key: "" - domain: "eu2.coralogix.com" - team_hostname: "your-company-name" - logs_retrieval_methodology: "ARCHIVE_FALLBACK" # Default -``` - -**Recommendations:** - -- Use `ARCHIVE_FALLBACK` for most cases (balances speed and coverage) -- Use `FREQUENT_SEARCH_ONLY` when you know Holmes does not need to access the log archive -- Use `ARCHIVE_ONLY` if the frequent search logs are always empty -- Use `BOTH_FREQUENT_SEARCH_AND_ARCHIVE` for comprehensive log coverage (slower) - -## Capabilities - -| Tool Name | Description | -|-----------|-------------| -| coralogix_fetch_logs | Fetch logs from Coralogix for specified pods and time ranges | diff --git a/docs/data-sources/builtin-toolsets/coralogix.md b/docs/data-sources/builtin-toolsets/coralogix.md new file mode 100644 index 000000000..fe9cdd6e3 --- /dev/null +++ b/docs/data-sources/builtin-toolsets/coralogix.md @@ -0,0 +1,222 @@ +# Coralogix + +Coralogix is a full-stack observability platform. HolmesGPT integrates with Coralogix to fetch both logs and metrics. + +## Overview + +**[Logs Integration](#logs-configuration)**: Fetch and analyze pod logs from Coralogix's log management system. + +**[Metrics Integration](#metrics-configuration)**: Query metrics using Coralogix's PromQL-compatible endpoint. + +--8<-- "snippets/toolsets_that_provide_logging.md" + +## Capabilities + +| Toolset | Tool Name | Description | +|---------|-----------|-------------| +| coralogix/logs | fetch_pod_logs | Fetch logs from Coralogix for specified pods and time ranges | +| prometheus/metrics | execute_prometheus_instant_query | Execute instant PromQL queries against Coralogix | +| prometheus/metrics | execute_prometheus_range_query | Execute range PromQL queries against Coralogix | + +## Prerequisites + +1. **API Key**: A [Coralogix API key](https://coralogix.com/docs/developer-portal/apis/data-query/direct-archive-query-http-api/#api-key) with `DataQuerying` permission preset +2. **Domain**: Your [Coralogix domain](https://coralogix.com/docs/user-guides/account-management/account-settings/coralogix-domain/) (e.g., `eu2.coralogix.com`) +3. **Team Hostname**: Your team's [name or hostname](https://coralogix.com/docs/user-guides/account-management/organization-management/create-an-organization/#teams-in-coralogix) (e.g., `your-company-name`) + +**Finding Your Domain and Team**: You can deduce these from your Coralogix UI URL. For example, if you access Coralogix at `https://my-team.app.eu2.coralogix.com/`, then: +- `team_hostname` = `my-team` +- `domain` = `eu2.coralogix.com` + +## Logs Configuration + +```yaml-toolset-config +toolsets: + coralogix/logs: + enabled: true + config: + api_key: "" + domain: "eu2.coralogix.com" + team_hostname: "your-company-name" + + kubernetes/logs: + enabled: false # Disable default Kubernetes logging +``` + +### Advanced Settings + +#### Complete Configuration Example + +Here's a full example with all available settings: + +```yaml-toolset-config +toolsets: + coralogix/logs: + enabled: true + config: + api_key: "" + domain: "eu2.coralogix.com" + team_hostname: "your-company-name" + + # Custom field mappings (if your logs use non-standard field names) + labels: + namespace: "resource.attributes.k8s.namespace.name" # Default + pod: "resource.attributes.k8s.pod.name" # Default + log_message: "logRecord.body" # Default + timestamp: "logRecord.attributes.time" # Default + + # Logs retrieval strategy + logs_retrieval_methodology: "ARCHIVE_FALLBACK" # Default + + kubernetes/logs: + enabled: false # Disable default Kubernetes logging +``` + +#### Configuration Options + +| Option | Description | Default | Values | +|--------|-------------|---------|--------| +| `api_key` | Coralogix API key with DataQuerying permission | *Required* | String | +| `domain` | Your Coralogix domain (e.g., `eu2.coralogix.com`) | *Required* | String | +| `team_hostname` | Your team's name/hostname | *Required* | String | +| `logs_retrieval_methodology` | Strategy for querying log tiers | `ARCHIVE_FALLBACK` | See below | +| `labels.namespace` | Field path for Kubernetes namespace | `resource.attributes.k8s.namespace.name` | String | +| `labels.pod` | Field path for Kubernetes pod name | `resource.attributes.k8s.pod.name` | String | +| `labels.log_message` | Field path for log message content | `logRecord.body` | String | +| `labels.timestamp` | Field path for log timestamp | `logRecord.attributes.time` | String | + +#### Logs Retrieval Strategies + +Coralogix stores logs in two tiers: + +- **Frequent Search**: Fast queries with limited retention +- **Archive**: Slower queries with longer retention + +To configure the retrieval strategy, set the `logs_retrieval_methodology` option in your configuration: + +| Strategy | Description | +|----------|-------------| +| `ARCHIVE_FALLBACK` | **Recommended** - Try Frequent Search first, fallback to Archive if no results | +| `FREQUENT_SEARCH_ONLY` | Only search Frequent Search tier | +| `ARCHIVE_ONLY` | Only search Archive tier | +| `BOTH_FREQUENT_SEARCH_AND_ARCHIVE` | Search both tiers and merge results (slower) | +| `FREQUENT_SEARCH_FALLBACK` | Try Archive first, fallback to Frequent Search if no results | + +## Metrics Configuration + +Coralogix provides a PromQL-compatible endpoint for querying metrics. + +**Regional Endpoints** - Choose your region's PromQL endpoint: + +- **EU2 (Europe)**: `https://prom-api.eu2.coralogix.com` +- **US1 (USA)**: `https://prom-api.coralogix.com` +- **US2 (USA)**: `https://prom-api.cx498.coralogix.com` +- **AP1 (India)**: `https://prom-api.app.coralogix.in` +- **AP2 (Singapore)**: `https://prom-api.coralogixsg.com` + +```yaml-toolset-config +# __HOLMES_HELM_EXTRA__: For Kubernetes deployments, see Advanced Settings below for using environment variables instead of hardcoding the API key +toolsets: + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://prom-api.eu2.coralogix.com" # Use your region + healthcheck: "/api/v1/query?query=up" # Required for Coralogix + headers: + token: "" + + # Coralogix-specific optimizations + fetch_metadata_with_series_api: true + fetch_labels_with_labels_api: true + metrics_labels_time_window_hrs: 72 +``` + +### Advanced Settings + +=== "HolmesGPT Helm Chart" + + #### Using Environment Variables + + Instead of hardcoding the API key in values.yaml, use a Kubernetes secret: + + ```bash + kubectl create secret generic coralogix-secrets \ + --from-literal=CORALOGIX_API_KEY='your-api-key' + ``` + + Then in your `values.yaml`: + + ```yaml + additionalEnvVars: + - name: CORALOGIX_API_KEY + valueFrom: + secretKeyRef: + name: coralogix-secrets + key: CORALOGIX_API_KEY + + toolsets: + coralogix/logs: + enabled: true + config: + api_key: "{{ env.CORALOGIX_API_KEY }}" + domain: "eu2.coralogix.com" + team_hostname: "your-company-name" + + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://prom-api.eu2.coralogix.com" + healthcheck: "/api/v1/query?query=up" + headers: + token: "{{ env.CORALOGIX_API_KEY }}" + fetch_metadata_with_series_api: true + fetch_labels_with_labels_api: true + ``` + +=== "Robusta Helm Chart" + + #### Using Environment Variables + + For Robusta deployments, create a Kubernetes secret: + + ```bash + kubectl create secret generic coralogix-secrets \ + --from-literal=CORALOGIX_API_KEY='your-api-key' + ``` + + Then in your `generated_values.yaml`: + + ```yaml + holmes: + additionalEnvVars: + - name: CORALOGIX_API_KEY + valueFrom: + secretKeyRef: + name: coralogix-secrets + key: CORALOGIX_API_KEY + + toolsets: + coralogix/logs: + enabled: true + config: + api_key: "{{ env.CORALOGIX_API_KEY }}" + domain: "eu2.coralogix.com" + team_hostname: "your-company-name" + + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://prom-api.eu2.coralogix.com" + healthcheck: "/api/v1/query?query=up" + headers: + token: "{{ env.CORALOGIX_API_KEY }}" + fetch_metadata_with_series_api: true + fetch_labels_with_labels_api: true + ``` + +#### Important Configuration Notes + +- **healthcheck**: Must be `/api/v1/query?query=up` (Coralogix doesn't support `-/healthy`) +- **fetch_metadata_with_series_api**: Set to `true` for better compatibility +- **fetch_labels_with_labels_api**: Set to `true` for improved performance +- **metrics_labels_time_window_hrs**: Increase to 72+ hours for better historical analysis diff --git a/docs/data-sources/builtin-toolsets/prometheus.md b/docs/data-sources/builtin-toolsets/prometheus.md index 20c1a2c9e..65fed0798 100644 --- a/docs/data-sources/builtin-toolsets/prometheus.md +++ b/docs/data-sources/builtin-toolsets/prometheus.md @@ -4,33 +4,60 @@ Connect HolmesGPT to Prometheus for metrics analysis and query generation. This ## Prerequisites -- A running and accessible Prometheus server -- Ensure HolmesGPT can connect to the Prometheus endpoint +- A running and accessible Prometheus server (or compatible service) +- Network access from HolmesGPT to the Prometheus endpoint + +### Supported Prometheus Providers + +HolmesGPT works with standard Prometheus and these managed services: + +- **[Coralogix](coralogix.md#metrics-configuration-prometheus)** - Full-stack observability platform +- **[Grafana Cloud (Mimir)](../prometheus-providers/grafana-cloud.md)** - Hosted Prometheus/Mimir service +- **[Amazon Managed Prometheus (AMP)](../prometheus-providers/amazon-managed-prometheus.md)** - AWS managed Prometheus service +- **VictoriaMetrics** - Prometheus-compatible monitoring solution ## Configuration ```yaml-toolset-config +# __CLI_EXTRA__: export PROMETHEUS_URL="http://your-prometheus:9090" && holmes ask "show me CPU usage" toolsets: - prometheus/metrics: - enabled: true - config: - prometheus_url: http://:9090 - - # Optional: - #headers: - # Authorization: "Basic " + prometheus/metrics: + enabled: true + config: + prometheus_url: http://:9090 + + # Optional authentication: + #headers: + # Authorization: "Basic " + + # Optional SSL/TLS settings: + #prometheus_ssl_enabled: true # Set to false to disable SSL verification (default: true) + + # Optional label filtering: + #additional_labels: # Add extra label selectors to all Prometheus queries + # cluster: "production" + # region: "us-west-2" ``` +### Validation -💡 **Alternative**: Set the `PROMETHEUS_URL` environment variable instead of using the config file. +=== "CLI" -## Validation + Test your connection: + ```bash + holmes ask "Show me the CPU usage for the last hour" + ``` -To test your connection, run: +=== "HolmesGPT Helm Chart" -```bash -holmes ask "Show me the CPU usage for the last hour" -``` + After deploying, test the API endpoint directly. See [HTTP API Reference](../../reference/http-api.md) for details. + +=== "Robusta Helm Chart" + + Open **Ask Holmes** in the Robusta SaaS platform and ask: + ``` + Show me the CPU usage for the last hour + ``` ## Troubleshooting @@ -65,14 +92,16 @@ This will print all possible Prometheus service URLs in your cluster. Pick the o - **Connection refused**: Check if the Prometheus URL is accessible from HolmesGPT. - **Authentication errors**: Verify the headers configuration for secured Prometheus endpoints. -- **No metrics returned**: Ensure that Prometheus is scraping your targets. +- **SSL certificate errors**: + - For self-signed certificates, set `prometheus_ssl_enabled: false` to disable verification + - Or provide a custom CA certificate via the `CERTIFICATE` environment variable (see [Custom SSL Certificates](../../ai-providers/openai-compatible.md#custom-ssl-certificates)) ## Advanced Configuration You can further customize the Prometheus toolset with the following options: -```yaml +```yaml-toolset-config toolsets: prometheus/metrics: enabled: true @@ -86,6 +115,10 @@ toolsets: fetch_labels_with_labels_api: false # Use labels API instead of series API (default: false) fetch_metadata_with_series_api: false # Use series API for metadata (default: false) tool_calls_return_data: true # If false, disables returning Prometheus data (default: true) + prometheus_ssl_enabled: true # Set to false to disable SSL verification (default: true) + additional_labels: # Add extra label selectors to all Prometheus queries (optional) + cluster: "production" + region: "us-west-2" ``` **Config option explanations:** @@ -98,6 +131,31 @@ toolsets: - `fetch_labels_with_labels_api`: Use the Prometheus labels API to fetch labels (can improve performance, but increases HTTP calls). - `fetch_metadata_with_series_api`: Use the series API for metadata (only set to true if the metadata API is disabled or not working). - `tool_calls_return_data`: If `false`, disables returning Prometheus data to HolmesGPT (useful if you hit token limits). +- `prometheus_ssl_enabled`: Enable/disable SSL certificate verification. Set to `false` for self-signed certificates (default: `true`). +- `additional_labels`: Dictionary of labels to add to all Prometheus queries. Useful for filtering metrics in multi-cluster or multi-tenant environments. + +## SSL/TLS Configuration + +### Self-Signed Certificates + +If your Prometheus instance uses self-signed certificates, you have two options: + +**Option 1: Disable SSL verification** (less secure, but simpler) +```yaml +prometheus/metrics: + config: + prometheus_ssl_enabled: false +``` + +**Option 2: Provide custom CA certificate** (more secure) +```yaml +# Set the CERTIFICATE environment variable with your base64-encoded CA certificate +additionalEnvVars: + - name: CERTIFICATE + value: "LS0tLS1CRUdJTi..." # Your base64-encoded CA certificate +``` + +The `CERTIFICATE` environment variable applies globally to all HTTPS connections made by Holmes, including Prometheus, AI providers, and other integrations. See [Custom SSL Certificates](../../ai-providers/openai-compatible.md#custom-ssl-certificates) for more details. ## Capabilities @@ -107,80 +165,3 @@ toolsets: | execute_prometheus_instant_query | Execute an instant PromQL query | | execute_prometheus_range_query | Execute a range PromQL query for time series data | | get_current_time | Get current timestamp for time-based queries | - ---- - -## Coralogix Prometheus Configuration - -To use a Coralogix PromQL endpoint with HolmesGPT: - -1. Go to [Coralogix Documentation](https://coralogix.com/docs/integrations/coralogix-endpoints/#promql) and choose the relevant PromQL endpoint for your region. -2. In Coralogix, create an API key with permissions to query metrics (Data Flow → API Keys). -3. Create a Kubernetes secret for the API key and expose it as an environment variable in your Helm values: - - ```yaml - holmes: - additionalEnvVars: - - name: CORALOGIX_API_KEY - valueFrom: - secretKeyRef: - name: coralogix-api-key - key: CORALOGIX_API_KEY - ``` - -4. Add the following under your toolsets in the Helm chart: - - ```yaml - holmes: - toolsets: - prometheus/metrics: - enabled: true - config: - healthcheck: "/api/v1/query?query=up" # This is important for Coralogix - prometheus_url: "https://prom-api.eu2.coralogix.com" # Use your region's endpoint - headers: - token: "{{ env.CORALOGIX_API_KEY }}" - metrics_labels_time_window_hrs: 72 - metrics_labels_cache_duration_hrs: 12 - fetch_labels_with_labels_api: true - tool_calls_return_data: true - fetch_metadata_with_series_api: true - ``` - ---- - -## Grafana Cloud (Mimir) Configuration - -To connect HolmesGPT to Grafana Cloud's Prometheus/Mimir endpoint: - -1. **Create a service account token in Grafana Cloud:** - - Navigate to "Administration → Service accounts" - - Create a new service account - - Generate a service account token (starts with `glsa_`) - -2. **Find your Prometheus datasource UID:** - ```bash - curl -H "Authorization: Bearer YOUR_GLSA_TOKEN" \ - "https://YOUR-INSTANCE.grafana.net/api/datasources" | \ - jq '.[] | select(.type=="prometheus") | {name, uid}' - ``` - -3. **Configure HolmesGPT:** - ```yaml - holmes: - toolsets: - prometheus/metrics: - enabled: true - config: - prometheus_url: https://YOUR-INSTANCE.grafana.net/api/datasources/proxy/uid/PROMETHEUS_DATASOURCE_UID - fetch_labels_with_labels_api: false # Important for Mimir - fetch_metadata_with_series_api: true # Important for Mimir - headers: - Authorization: Bearer YOUR_GLSA_TOKEN - ``` - -**Important notes:** - -- Use the proxy endpoint URL format `/api/datasources/proxy/uid/` - this handles authentication and routing to Mimir automatically -- Set `fetch_labels_with_labels_api: false` for optimal Mimir compatibility -- Set `fetch_metadata_with_series_api: true` for proper metadata retrieval diff --git a/docs/data-sources/prometheus-providers/amazon-managed-prometheus.md b/docs/data-sources/prometheus-providers/amazon-managed-prometheus.md new file mode 100644 index 000000000..4ea180289 --- /dev/null +++ b/docs/data-sources/prometheus-providers/amazon-managed-prometheus.md @@ -0,0 +1,155 @@ +# Amazon Managed Prometheus (AMP) + +Configure HolmesGPT to use Amazon Managed Prometheus for metrics analysis in AWS environments. + +## Prerequisites + +- AWS account with AMP workspace +- IAM credentials or IRSA (IAM Roles for Service Accounts) configured +- AMP workspace endpoint URL + +## Configuration Options + +### Option 1: Using IRSA (Recommended for EKS) + +If running HolmesGPT in EKS with IRSA configured: + +```yaml-toolset-config +toolsets: + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://aps-workspaces.us-west-2.amazonaws.com/workspaces/ws-xxxxx" + aws_region: "us-west-2" + # IRSA credentials will be automatically detected +``` + +### Option 2: Using IAM Credentials + +```yaml-toolset-config +toolsets: + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://aps-workspaces.us-west-2.amazonaws.com/workspaces/ws-xxxxx" + aws_region: "us-west-2" + aws_access_key: "YOUR_ACCESS_KEY" # Consider using environment variables + aws_secret_access_key: "YOUR_SECRET_KEY" # Consider using environment variables +``` + +### Option 3: Using Environment Variables (Recommended) + +=== "CLI" + + Set AWS credentials as environment variables: + ```bash + export AWS_ACCESS_KEY_ID="your-access-key" + export AWS_SECRET_ACCESS_KEY="your-secret-key" + export AWS_REGION="us-west-2" + ``` + + Configure `~/.holmes/config.yaml`: + ```yaml + toolsets: + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://aps-workspaces.us-west-2.amazonaws.com/workspaces/ws-xxxxx" + aws_region: "us-west-2" + ``` + +=== "Kubernetes (Helm)" + + Store credentials as a Kubernetes secret: + ```bash + kubectl create secret generic aws-credentials \ + --from-literal=AWS_ACCESS_KEY_ID='your-access-key' \ + --from-literal=AWS_SECRET_ACCESS_KEY='your-secret-key' + ``` + + Configure your Helm values: + ```yaml + additionalEnvVars: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-credentials + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-credentials + key: AWS_SECRET_ACCESS_KEY + + toolsets: + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://aps-workspaces.us-west-2.amazonaws.com/workspaces/ws-xxxxx" + aws_region: "us-west-2" + ``` + +## Finding Your AMP Workspace URL + +1. Navigate to the Amazon Managed Service for Prometheus console +2. Select your workspace +3. Copy the **Workspace endpoint URL** +4. Your URL format should be: `https://aps-workspaces.REGION.amazonaws.com/workspaces/WORKSPACE_ID` + +## IAM Permissions Required + +Your IAM user or role needs these permissions: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "aps:QueryMetrics", + "aps:GetSeries", + "aps:GetLabels", + "aps:GetMetricMetadata" + ], + "Resource": "arn:aws:aps:REGION:ACCOUNT:workspace/WORKSPACE_ID" + } + ] +} +``` + +## Configuration Notes + +- **Authentication**: AMP uses AWS SigV4 authentication, which is handled automatically +- **SSL**: SSL verification is disabled by default for AMP (set by the AMPConfig class) +- **Healthcheck**: Automatically set to `api/v1/query?query=up` for AMP compatibility +- **IRSA**: If using IRSA, ensure your service account is properly annotated with the IAM role + +## Validation + +Test your configuration: + +```bash +holmes ask "What metrics are available in my AMP workspace?" +``` + +## Troubleshooting + +### Authentication Errors +- Verify IAM permissions are correct +- Check AWS credentials are properly set +- For IRSA, ensure the service account has the correct annotation + +### Connection Issues +- Verify the workspace URL is correct +- Check the AWS region matches your workspace location +- Ensure network connectivity from your cluster to AMP + +### No Metrics Found +- Confirm metrics are being ingested into AMP +- Check that Prometheus remote write is configured correctly +- Verify the time range of your queries + +## Additional Options + +For all available Prometheus configuration options, see the [main Prometheus documentation](../prometheus.md#advanced-configuration). diff --git a/docs/data-sources/prometheus-providers/grafana-cloud.md b/docs/data-sources/prometheus-providers/grafana-cloud.md new file mode 100644 index 000000000..2cc3524e6 --- /dev/null +++ b/docs/data-sources/prometheus-providers/grafana-cloud.md @@ -0,0 +1,118 @@ +# Grafana Cloud (Mimir) + +Configure HolmesGPT to use Grafana Cloud's Prometheus/Mimir endpoint for metrics analysis. + +## Prerequisites + +- Grafana Cloud account +- Service account with MetricsReader role +- Your Grafana Cloud stack information + +## Configuration Steps + +### 1. Create a Service Account Token + +1. Navigate to **Administration → Service accounts** in Grafana Cloud +2. Create a new service account with a descriptive name (e.g., `holmes-metrics-reader`) +3. Assign the **MetricsReader** role +4. Generate a new service account token +5. Copy the generated token (you won't be able to see it again) + +### 2. Find Your Prometheus Endpoint + +Your Prometheus endpoint URL format: +``` +https://.grafana.net/api/prom +``` + +You can find your stack name in your Grafana Cloud portal URL. + +### 3. Configure HolmesGPT + +=== "CLI" + + Create or edit `~/.holmes/config.yaml`: + + ```yaml + toolsets: + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://your-stack.grafana.net/api/prom" + healthcheck: "/api/v1/query?query=up" # Required for Mimir + headers: + Authorization: "Bearer YOUR_SERVICE_ACCOUNT_TOKEN" + + # Mimir-specific settings + metrics_labels_time_window_hrs: 168 # 7 days + fetch_labels_with_labels_api: true + ``` + +=== "Kubernetes (Helm)" + + Store your token as a Kubernetes secret: + + ```bash + kubectl create secret generic grafana-cloud-token \ + --from-literal=GRAFANA_CLOUD_TOKEN='your-service-account-token' + ``` + + Then configure your Helm values: + + ```yaml + additionalEnvVars: + - name: GRAFANA_CLOUD_TOKEN + valueFrom: + secretKeyRef: + name: grafana-cloud-token + key: GRAFANA_CLOUD_TOKEN + + toolsets: + prometheus/metrics: + enabled: true + config: + prometheus_url: "https://your-stack.grafana.net/api/prom" + healthcheck: "/api/v1/query?query=up" # Required for Mimir + headers: + Authorization: "Bearer {{ env.GRAFANA_CLOUD_TOKEN }}" + + # Mimir-specific settings + metrics_labels_time_window_hrs: 168 # 7 days + fetch_labels_with_labels_api: true + ``` + +## Important Configuration Notes + +- **healthcheck**: Must be set to `/api/v1/query?query=up` (Mimir doesn't support `-/healthy`) +- **Authorization header**: Must use `Bearer` token format +- **metrics_labels_time_window_hrs**: Can be increased up to your data retention period +- **Rate limits**: Grafana Cloud has rate limits - HolmesGPT respects these automatically + +## Validation + +Test your configuration: + +```bash +holmes ask "Show me the current memory usage metrics" +``` + +## Troubleshooting + +### Authentication Errors +- Verify your service account token is correct +- Ensure the token has MetricsReader permissions +- Check that Authorization header uses `Bearer` prefix + +### Rate Limiting +If you encounter rate limit errors: +- Reduce `metrics_labels_cache_duration_hrs` to cache results longer +- Decrease `metrics_labels_time_window_hrs` to query less data + +### Connection Issues +- Verify your stack name in the URL is correct +- Ensure the `/api/prom` path is included in the URL +- Check network connectivity to Grafana Cloud + +## Additional Options + +For all available Prometheus configuration options, see the [main Prometheus documentation](../prometheus.md#advanced-configuration). diff --git a/mkdocs.yml b/mkdocs.yml index 3cdb5d087..06168eb0e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -172,7 +172,7 @@ nav: - Azure Kubernetes Service: data-sources/builtin-toolsets/aks.md - Azure SQL Database: data-sources/builtin-toolsets/azure-sql.md - Confluence: data-sources/builtin-toolsets/confluence.md - - Coralogix logs: data-sources/builtin-toolsets/coralogix-logs.md + - Coralogix: data-sources/builtin-toolsets/coralogix.md - DataDog: data-sources/builtin-toolsets/datadog.md - Datetime: data-sources/builtin-toolsets/datetime.md - Docker: data-sources/builtin-toolsets/docker.md @@ -189,6 +189,8 @@ nav: - OpenSearch logs: data-sources/builtin-toolsets/opensearch-logs.md - OpenSearch status: data-sources/builtin-toolsets/opensearch-status.md - Prometheus: data-sources/builtin-toolsets/prometheus.md + - Prometheus (Grafana Cloud): data-sources/prometheus-providers/grafana-cloud.md + - Prometheus (Amazon Managed): data-sources/prometheus-providers/amazon-managed-prometheus.md - RabbitMQ: data-sources/builtin-toolsets/rabbitmq.md - Robusta: data-sources/builtin-toolsets/robusta.md - ServiceNow: data-sources/builtin-toolsets/servicenow.md