Skip to content

Commit e40e44d

Browse files
committed
add fetch_filtered_json tool
1 parent 2fd3406 commit e40e44d

File tree

2 files changed

+176
-1
lines changed

2 files changed

+176
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ Each builtin server entry requires:
235235
- `todo`: Manage ephemeral todo lists for task tracking during sessions
236236
- No configuration options required (todos are stored in memory and reset on restart)
237237
- `http`: Fetch web content and convert to text, markdown, or HTML formats
238-
- Tools: `fetch` (fetch and convert web content), `fetch_summarize` (fetch and summarize web content using AI), `fetch_extract` (fetch and extract specific data using AI)
238+
- Tools: `fetch` (fetch and convert web content), `fetch_summarize` (fetch and summarize web content using AI), `fetch_extract` (fetch and extract specific data using AI), `fetch_filtered_json` (fetch JSON and filter using gjson path syntax)
239239
- No configuration options required
240240

241241
#### Builtin Server Examples

internal/builtin/http.go

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package builtin
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"io"
78
"net/http"
@@ -15,6 +16,7 @@ import (
1516
"github.com/cloudwego/eino/schema"
1617
"github.com/mark3labs/mcp-go/mcp"
1718
"github.com/mark3labs/mcp-go/server"
19+
"github.com/tidwall/gjson"
1820
)
1921

2022
const (
@@ -83,6 +85,24 @@ func NewHTTPServer(llmModel model.ToolCallingChatModel) (*server.MCPServer, erro
8385
),
8486
)
8587
s.AddTool(extractTool, executeHTTPFetchExtract)
88+
89+
filterJSONTool := mcp.NewTool("fetch_filtered_json",
90+
mcp.WithDescription(httpFilterJSONDescription),
91+
mcp.WithString("url",
92+
mcp.Required(),
93+
mcp.Description("The URL to fetch JSON content from"),
94+
),
95+
mcp.WithString("path",
96+
mcp.Required(),
97+
mcp.Description("The gjson path expression to filter the JSON (e.g., 'users.#.name', 'data.items.0', 'results.#(age>25).name')"),
98+
),
99+
mcp.WithNumber("timeout",
100+
mcp.Description("Optional timeout in seconds (max 120)"),
101+
mcp.Min(0),
102+
mcp.Max(120),
103+
),
104+
)
105+
s.AddTool(filterJSONTool, executeHTTPFetchFilteredJSON)
86106
}
87107

88108
return s, nil
@@ -479,6 +499,140 @@ func httpExtractTextFromHTML(htmlContent string) (string, error) {
479499
return strings.Join(cleanLines, "\n"), nil
480500
}
481501

502+
// executeHTTPFetchFilteredJSON handles the fetch_filtered_json tool execution
503+
func executeHTTPFetchFilteredJSON(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
504+
// Extract parameters
505+
urlStr, err := request.RequireString("url")
506+
if err != nil {
507+
return mcp.NewToolResultError("url parameter is required and must be a string"), nil
508+
}
509+
510+
path, err := request.RequireString("path")
511+
if err != nil {
512+
return mcp.NewToolResultError("path parameter is required and must be a string"), nil
513+
}
514+
515+
// Parse timeout (optional)
516+
timeout := httpDefaultFetchTimeout
517+
if timeoutSec := request.GetFloat("timeout", 0); timeoutSec > 0 {
518+
timeoutDuration := time.Duration(timeoutSec) * time.Second
519+
if timeoutDuration > httpMaxFetchTimeout {
520+
timeout = httpMaxFetchTimeout
521+
} else {
522+
timeout = timeoutDuration
523+
}
524+
}
525+
526+
// Validate URL
527+
parsedURL, err := url.Parse(urlStr)
528+
if err != nil {
529+
return mcp.NewToolResultError(fmt.Sprintf("invalid URL: %v", err)), nil
530+
}
531+
532+
// Ensure URL has a scheme
533+
if parsedURL.Scheme == "" {
534+
urlStr = "https://" + urlStr
535+
parsedURL, err = url.Parse(urlStr)
536+
if err != nil {
537+
return mcp.NewToolResultError(fmt.Sprintf("invalid URL after adding https: %v", err)), nil
538+
}
539+
}
540+
541+
// Only allow HTTP and HTTPS
542+
if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
543+
return mcp.NewToolResultError("URL must use http:// or https://"), nil
544+
}
545+
546+
// Create HTTP client with timeout
547+
client := &http.Client{
548+
Timeout: timeout,
549+
}
550+
551+
// Create request with context
552+
req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil)
553+
if err != nil {
554+
return mcp.NewToolResultError(fmt.Sprintf("failed to create request: %v", err)), nil
555+
}
556+
557+
// Set headers to mimic a real browser and accept JSON
558+
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
559+
req.Header.Set("Accept", "application/json, text/plain, */*")
560+
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
561+
562+
// Make the request
563+
resp, err := client.Do(req)
564+
if err != nil {
565+
return mcp.NewToolResultError(fmt.Sprintf("request failed: %v", err)), nil
566+
}
567+
defer resp.Body.Close()
568+
569+
// Check status code
570+
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
571+
return mcp.NewToolResultError(fmt.Sprintf("request failed with status code: %d", resp.StatusCode)), nil
572+
}
573+
574+
// Check content length
575+
if resp.ContentLength > httpMaxResponseSize {
576+
return mcp.NewToolResultError("response too large (exceeds 5MB limit)"), nil
577+
}
578+
579+
// Read response body with size limit
580+
limitedReader := io.LimitReader(resp.Body, httpMaxResponseSize+1)
581+
bodyBytes, err := io.ReadAll(limitedReader)
582+
if err != nil {
583+
return mcp.NewToolResultError(fmt.Sprintf("failed to read response: %v", err)), nil
584+
}
585+
586+
// Check if we exceeded the size limit
587+
if len(bodyBytes) > httpMaxResponseSize {
588+
return mcp.NewToolResultError("response too large (exceeds 5MB limit)"), nil
589+
}
590+
591+
content := string(bodyBytes)
592+
593+
// Validate that the content is valid JSON
594+
if !json.Valid(bodyBytes) {
595+
return mcp.NewToolResultError("response is not valid JSON"), nil
596+
}
597+
598+
// Apply gjson path to filter the JSON
599+
result := gjson.Get(content, path)
600+
if !result.Exists() {
601+
return mcp.NewToolResultError(fmt.Sprintf("gjson path '%s' did not match any data", path)), nil
602+
}
603+
604+
// Get the filtered JSON as a string
605+
var filteredJSON string
606+
if result.IsArray() || result.IsObject() {
607+
filteredJSON = result.Raw
608+
} else {
609+
// For primitive values, wrap in quotes if it's a string
610+
if result.Type == gjson.String {
611+
filteredJSON = fmt.Sprintf(`"%s"`, result.Str)
612+
} else {
613+
filteredJSON = result.Raw
614+
}
615+
}
616+
617+
// Create result with metadata
618+
contentType := resp.Header.Get("Content-Type")
619+
if contentType == "" {
620+
contentType = "application/json"
621+
}
622+
623+
title := fmt.Sprintf("Filtered JSON from %s (path: %s)", urlStr, path)
624+
mcpResult := mcp.NewToolResultText(filteredJSON)
625+
mcpResult.Meta = map[string]any{
626+
"title": title,
627+
"url": urlStr,
628+
"contentType": contentType,
629+
"gjsonPath": path,
630+
"resultType": result.Type.String(),
631+
}
632+
633+
return mcpResult, nil
634+
}
635+
482636
// httpGetTextFromSamplingResult extracts text from sampling result
483637
func httpGetTextFromSamplingResult(result *mcp.CreateMessageResult) string {
484638
if textContent, ok := result.Content.(mcp.TextContent); ok {
@@ -532,3 +686,24 @@ Usage notes:
532686
- Instructions should be specific (e.g., "Extract all product names and prices", "Get the main article content", "Find all email addresses")
533687
- Returns "Information not found" if the requested data is not available
534688
- Ideal for structured data extraction, content parsing, and targeted information retrieval`
689+
690+
const httpFilterJSONDescription = `Fetches JSON content from a URL and applies gjson path filtering to extract specific data.
691+
692+
- Fetches JSON content from a specified URL using HTTP GET
693+
- Uses gjson path syntax to filter and extract specific parts of the JSON
694+
- Returns filtered JSON results based on the provided path expression
695+
- Supports all gjson features: wildcards, arrays, queries, modifiers, and more
696+
697+
Usage notes:
698+
- The URL must return valid JSON content
699+
- Uses gjson path syntax for filtering (see https://github.com/tidwall/gjson/blob/master/SYNTAX.md)
700+
- Common path examples:
701+
- "users.#.name" - Get all user names from an array
702+
- "data.items.0" - Get the first item from data.items array
703+
- "results.#(age>25).name" - Get names where age > 25
704+
- "friends.#(last==\"Murphy\")#.first" - Get first names of all Murphys
705+
- "@reverse" - Reverse an array
706+
- "users.#.{name,email}" - Create new objects with only name and email
707+
- Returns error if path doesn't match any data
708+
- Maximum response size is 5MB
709+
- Timeout can be specified in seconds (default 30s, max 120s)`

0 commit comments

Comments
 (0)