package builtin // WebSearch 工具 -- 多后端网页搜索. // // 支持 4 种后端,按优先级自动检测: // 1. Google Custom Search API(GOOGLE_SEARCH_API_KEY + GOOGLE_SEARCH_CX) // 2. Brave Search API(BRAVE_SEARCH_API_KEY) // 3. SearXNG 自托管实例(SEARXNG_URL) // 4. DuckDuckGo Instant Answer(免费,功能有限)- 默认 fallback // // 升华改进(ELEVATED): 早期实现 CLI 只支持单一搜索 API; // 我们做成多后端插件式:零配置即可用(DDG fallback), // 配置 env var 即升级到付费 API. // 替代方案:<只支持一种 API,用户必须配置> - 否决:零配置体验丢失, // 开箱即用是核心价值. // // 特性: // - ConcurrencySafe: true,ReadOnly: true // - 零外部依赖(只用 Go 标准库) // - 优雅降级:所有后端失败时返回明确错误 import ( "context" "encoding/json" "fmt" "io" "net/http" "net/url" "os" "strings" "time" "git.flytoex.net/yuanwei/flyto-agent/pkg/permission" "git.flytoex.net/yuanwei/flyto-agent/pkg/tools" ) // WebSearchTool 是网页搜索工具. type WebSearchTool struct { httpClient *http.Client } // NewWebSearchTool 创建一个 WebSearch 工具实例. func NewWebSearchTool() *WebSearchTool { return &WebSearchTool{ httpClient: &http.Client{ Timeout: 15 * time.Second, }, } } // webSearchInput 是 WebSearch 工具的输入参数. type webSearchInput struct { Query string `json:"query"` MaxResults int `json:"max_results,omitempty"` // 最大结果数,默认 10 } // searchResult 是单条搜索结果. type searchResult struct { Title string `json:"title"` URL string `json:"url"` Snippet string `json:"snippet"` } // Name 返回工具名称. func (t *WebSearchTool) Name() string { return "WebSearch" } // Description 返回工具描述. func (t *WebSearchTool) Description(ctx context.Context) string { return "Searches the web for information. " + "Returns search results including titles, URLs, and snippets. " + "Supports multiple backends: Google CSE, Brave, SearXNG, DuckDuckGo (fallback)." } // InputSchema 返回工具的 JSON Schema 输入定义. func (t *WebSearchTool) InputSchema() json.RawMessage { return json.RawMessage(`{ "type": "object", "properties": { "query": { "type": "string", "description": "The search query" }, "max_results": { "type": "integer", "description": "Maximum number of results to return (default 10)", "default": 10 } }, "required": ["query"] }`) } // Metadata 返回工具元数据. func (t *WebSearchTool) Metadata() tools.Metadata { return tools.Metadata{ ConcurrencySafe: true, ReadOnly: true, Destructive: false, SearchHint: "web search google bing brave query internet", PermissionClass: permission.PermClassReadOnly, AuditOperation: "invoke", } } // Execute 执行网页搜索. // 精妙之处(CLEVER): 后端按配置优先级级联--有 Google key 用 Google,否则降级. // 任何后端失败都会自动尝试下一个,最后是 DDG fallback,保证零配置也能用. func (t *WebSearchTool) Execute(ctx context.Context, input json.RawMessage, progress tools.ProgressFunc) (*tools.Result, error) { var params webSearchInput if err := json.Unmarshal(input, ¶ms); err != nil { return nil, fmt.Errorf("websearch: invalid input: %w", err) } if params.Query == "" { return &tools.Result{ Output: "error: query is required", IsError: true, }, nil } maxResults := params.MaxResults if maxResults <= 0 { maxResults = 10 } if maxResults > 50 { maxResults = 50 // 上限保护 } // 按优先级尝试各后端 backends := t.selectBackends() var lastErr error for _, backend := range backends { results, err := backend(ctx, t.httpClient, params.Query, maxResults) if err != nil { lastErr = err continue // 尝试下一个后端 } if len(results) == 0 { lastErr = fmt.Errorf("no results") continue } return &tools.Result{ Output: formatResults(params.Query, results), IsError: false, }, nil } // 所有后端都失败 return &tools.Result{ Output: fmt.Sprintf("WebSearch failed on all backends.\n\n"+ "Last error: %v\n\n"+ "Tried backends (in order):\n"+ " - Google CSE (GOOGLE_SEARCH_API_KEY + GOOGLE_SEARCH_CX)\n"+ " - Brave Search (BRAVE_SEARCH_API_KEY)\n"+ " - SearXNG (SEARXNG_URL)\n"+ " - DuckDuckGo (free fallback)\n\n"+ "Query: %q", lastErr, params.Query), IsError: true, }, nil } // searchBackend 是搜索后端的函数签名. type searchBackend func(ctx context.Context, client *http.Client, query string, maxResults int) ([]searchResult, error) // selectBackends 根据环境变量配置返回可用的后端按优先级排列. // 精妙之处(CLEVER): 不缓存结果--每次调用都重新读 env var, // 允许运行时通过 engine.Config 的 secrets 机制动态注入 key. func (t *WebSearchTool) selectBackends() []searchBackend { var backends []searchBackend // 1. Google Custom Search(付费,质量最好) if os.Getenv("GOOGLE_SEARCH_API_KEY") != "" && os.Getenv("GOOGLE_SEARCH_CX") != "" { backends = append(backends, googleCSEBackend) } // 2. Brave Search(付费,隐私友好) if os.Getenv("BRAVE_SEARCH_API_KEY") != "" { backends = append(backends, braveBackend) } // 3. SearXNG(自托管,免费) if os.Getenv("SEARXNG_URL") != "" { backends = append(backends, searxngBackend) } // 4. DuckDuckGo Instant Answer(免费兜底,功能有限) backends = append(backends, duckDuckGoBackend) return backends } // ============================================================ // Backend 1: Google Custom Search API // ============================================================ // googleCSEResponse 是 Google CSE API 的响应格式(精简版). type googleCSEResponse struct { Items []struct { Title string `json:"title"` Link string `json:"link"` Snippet string `json:"snippet"` } `json:"items"` Error *struct { Code int `json:"code"` Message string `json:"message"` } `json:"error,omitempty"` } func googleCSEBackend(ctx context.Context, client *http.Client, query string, maxResults int) ([]searchResult, error) { apiKey := os.Getenv("GOOGLE_SEARCH_API_KEY") cx := os.Getenv("GOOGLE_SEARCH_CX") // Google CSE 每次最多返回 10 条 num := maxResults if num > 10 { num = 10 } params := url.Values{} params.Set("key", apiKey) params.Set("cx", cx) params.Set("q", query) params.Set("num", fmt.Sprintf("%d", num)) reqURL := "https://www.googleapis.com/customsearch/v1?" + params.Encode() req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) if err != nil { return nil, fmt.Errorf("google: build request: %w", err) } resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("google: http: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) // 1MB 上限 if err != nil { return nil, fmt.Errorf("google: read body: %w", err) } var result googleCSEResponse if err := json.Unmarshal(body, &result); err != nil { return nil, fmt.Errorf("google: parse json: %w", err) } if result.Error != nil { return nil, fmt.Errorf("google: API error %d: %s", result.Error.Code, result.Error.Message) } results := make([]searchResult, 0, len(result.Items)) for _, item := range result.Items { results = append(results, searchResult{ Title: item.Title, URL: item.Link, Snippet: item.Snippet, }) } return results, nil } // ============================================================ // Backend 2: Brave Search API // ============================================================ // braveResponse 是 Brave Search API 的响应格式(精简版). type braveResponse struct { Web struct { Results []struct { Title string `json:"title"` URL string `json:"url"` Description string `json:"description"` } `json:"results"` } `json:"web"` } func braveBackend(ctx context.Context, client *http.Client, query string, maxResults int) ([]searchResult, error) { apiKey := os.Getenv("BRAVE_SEARCH_API_KEY") params := url.Values{} params.Set("q", query) params.Set("count", fmt.Sprintf("%d", maxResults)) reqURL := "https://api.search.brave.com/res/v1/web/search?" + params.Encode() req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) if err != nil { return nil, fmt.Errorf("brave: build request: %w", err) } req.Header.Set("X-Subscription-Token", apiKey) req.Header.Set("Accept", "application/json") resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("brave: http: %w", err) } defer resp.Body.Close() if resp.StatusCode != 200 { return nil, fmt.Errorf("brave: HTTP %d", resp.StatusCode) } body, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) if err != nil { return nil, fmt.Errorf("brave: read body: %w", err) } var result braveResponse if err := json.Unmarshal(body, &result); err != nil { return nil, fmt.Errorf("brave: parse json: %w", err) } results := make([]searchResult, 0, len(result.Web.Results)) for _, item := range result.Web.Results { results = append(results, searchResult{ Title: item.Title, URL: item.URL, Snippet: item.Description, }) } return results, nil } // ============================================================ // Backend 3: SearXNG 自托管实例 // ============================================================ // searxngResponse 是 SearXNG JSON API 的响应格式(精简版). type searxngResponse struct { Results []struct { Title string `json:"title"` URL string `json:"url"` Content string `json:"content"` } `json:"results"` } func searxngBackend(ctx context.Context, client *http.Client, query string, maxResults int) ([]searchResult, error) { baseURL := strings.TrimRight(os.Getenv("SEARXNG_URL"), "/") params := url.Values{} params.Set("q", query) params.Set("format", "json") reqURL := baseURL + "/search?" + params.Encode() req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) if err != nil { return nil, fmt.Errorf("searxng: build request: %w", err) } req.Header.Set("Accept", "application/json") resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("searxng: http: %w", err) } defer resp.Body.Close() if resp.StatusCode != 200 { return nil, fmt.Errorf("searxng: HTTP %d", resp.StatusCode) } body, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) if err != nil { return nil, fmt.Errorf("searxng: read body: %w", err) } var result searxngResponse if err := json.Unmarshal(body, &result); err != nil { return nil, fmt.Errorf("searxng: parse json: %w", err) } limit := maxResults if len(result.Results) < limit { limit = len(result.Results) } results := make([]searchResult, 0, limit) for i := 0; i < limit; i++ { results = append(results, searchResult{ Title: result.Results[i].Title, URL: result.Results[i].URL, Snippet: result.Results[i].Content, }) } return results, nil } // ============================================================ // Backend 4: DuckDuckGo Instant Answer API(免费兜底) // ============================================================ // duckDuckGoResponse 是 DuckDuckGo Instant Answer API 的响应格式. // 历史包袱(LEGACY): DDG Instant Answer 只返回 Wikipedia-style 答案, // 不是完整的网络搜索结果.仅作为"零配置兜底",功能受限. // 理想情况下:未来可加 DDG HTML 爬取(但需要 HTML 解析器). type duckDuckGoResponse struct { AbstractText string `json:"AbstractText"` AbstractURL string `json:"AbstractURL"` AbstractSource string `json:"AbstractSource"` Heading string `json:"Heading"` RelatedTopics []struct { Text string `json:"Text"` FirstURL string `json:"FirstURL"` } `json:"RelatedTopics"` } func duckDuckGoBackend(ctx context.Context, client *http.Client, query string, maxResults int) ([]searchResult, error) { params := url.Values{} params.Set("q", query) params.Set("format", "json") params.Set("no_html", "1") params.Set("skip_disambig", "1") reqURL := "https://api.duckduckgo.com/?" + params.Encode() req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) if err != nil { return nil, fmt.Errorf("ddg: build request: %w", err) } req.Header.Set("Accept", "application/json") resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("ddg: http: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) if err != nil { return nil, fmt.Errorf("ddg: read body: %w", err) } var result duckDuckGoResponse if err := json.Unmarshal(body, &result); err != nil { return nil, fmt.Errorf("ddg: parse json: %w", err) } var results []searchResult // 主要摘要(如果有) if result.AbstractText != "" { results = append(results, searchResult{ Title: result.Heading, URL: result.AbstractURL, Snippet: result.AbstractText, }) } // 相关主题 for _, topic := range result.RelatedTopics { if len(results) >= maxResults { break } if topic.FirstURL == "" { continue } // Text 通常格式是 "Title - Description" title, snippet := topic.Text, "" if idx := strings.Index(topic.Text, " - "); idx > 0 { title = topic.Text[:idx] snippet = topic.Text[idx+3:] } results = append(results, searchResult{ Title: title, URL: topic.FirstURL, Snippet: snippet, }) } if len(results) == 0 { return nil, fmt.Errorf("ddg: no results (query may be too specific for Instant Answer API)") } return results, nil } // ============================================================ // 格式化输出 // ============================================================ // formatResults 将搜索结果格式化为人类可读的文本. func formatResults(query string, results []searchResult) string { var sb strings.Builder fmt.Fprintf(&sb, "Search results for: %q\n", query) fmt.Fprintf(&sb, "Found %d result(s)\n\n", len(results)) for i, r := range results { fmt.Fprintf(&sb, "[%d] %s\n", i+1, r.Title) fmt.Fprintf(&sb, " %s\n", r.URL) if r.Snippet != "" { // 截断过长的 snippet snippet := r.Snippet if len(snippet) > 300 { snippet = snippet[:297] + "..." } fmt.Fprintf(&sb, " %s\n", snippet) } sb.WriteString("\n") } return sb.String() }