package builtin // Grep 工具 -- 正则表达式搜索文件内容(双引擎版). // // 这是 Agent 在代码库中搜索的核心能力:通过正则表达式模式 // 在文件内容中搜索匹配的行. // // 升华改进(ELEVATED): 双引擎架构 - 检测系统是否有 rg(ripgrep), // 有就用(性能最优,SIMD 加速),没有就用纯 Go regexp 兜底(零依赖). // 替代方案:只用纯 Go regexp(原始设计,零依赖但在大型代码库中慢 10-50 倍). // // 特性: // - Ripgrep 引擎:调用系统 rg 命令,支持 --type 文件类型过滤,多行匹配,PCRE2 // - 内置引擎:纯 Go regexp 实现,手动 .gitignore 处理 // - 支持 output_mode: content / files_with_matches / count // - 支持 -A/-B/-C 上下文行数(分别控制前后行数) // - 支持 case_insensitive 大小写不敏感搜索 // - 支持 glob 参数过滤文件(逗号分割,花括号内逗号保护) // - 支持 type 参数按语言类型过滤 // - 支持 multiline 跨行匹配 // - 支持 offset 跳过前 N 条 // - 尊重 .gitignore 规则 // - 自动检测并跳过二进制文件 // - 长行截断(超过 500 字符) // - 支持 head_limit(默认 250,0 = 无限制) // - 匹配文本用 >> << 标记高亮(内置引擎) // - VCS 目录排除(.git, .svn, .hg, .bzr, .jj, .sl) // - ConcurrencySafe: true,ReadOnly: true import ( "context" "encoding/json" "fmt" "os" "path/filepath" "regexp" "git.flytoex.net/yuanwei/flyto-agent/pkg/execenv" "git.flytoex.net/yuanwei/flyto-agent/pkg/permission" "git.flytoex.net/yuanwei/flyto-agent/pkg/tools" ) // GrepTool 是正则表达式搜索工具. type GrepTool struct { executor execenv.Executor } // NewGrepTool 创建一个 Grep 工具实例. executor 不能为 nil (方案 β 严格 DI). // M1: GrepTool 从无状态 struct 升级为持有 Executor, 用于 RipgrepEngine 子进程. func NewGrepTool(executor execenv.Executor) *GrepTool { if executor == nil { panic("builtin.NewGrepTool: executor is nil (方案 β 严格 DI)") } return &GrepTool{executor: executor} } // grepInput 是 Grep 工具的输入参数. // 升华改进(ELEVATED): 增加了 -A/-B/-C 分离控制,type 文件类型,multiline 多行匹配,offset 偏移. // 替代方案:只有 context 一个参数(原始设计,简单但无法精确控制前后行数). type grepInput struct { Pattern string `json:"pattern"` Path string `json:"path,omitempty"` Glob string `json:"glob,omitempty"` OutputMode string `json:"output_mode,omitempty"` Context int `json:"context,omitempty"` // -C:前后各 N 行 ContextBefore int `json:"-B,omitempty"` // -B:前 N 行 ContextAfter int `json:"-A,omitempty"` // -A:后 N 行 CaseInsensitive bool `json:"case_insensitive,omitempty"` // -i HeadLimit int `json:"head_limit,omitempty"` // 输出行数上限(默认 250,0=无限) FileType string `json:"type,omitempty"` // 文件类型过滤(如 go, js, py) Multiline bool `json:"multiline,omitempty"` // 跨行匹配 Offset int `json:"offset,omitempty"` // 跳过前 N 条 } // Name 返回工具名称. func (t *GrepTool) Name() string { return "Grep" } // Description 返回工具描述. func (t *GrepTool) Description(ctx context.Context) string { return "Searches file contents using regular expressions. " + "Supports full regex syntax, file filtering with glob patterns, " + "and multiple output modes (content, files_with_matches, count). " + "Supports file type filtering (--type), multiline matching, " + "and separate before/after context control (-A, -B, -C). " + "Respects .gitignore rules. Automatically skips binary files. " + "Uses ripgrep when available, falls back to pure Go. " + "Use head_limit to control output size (default 250, 0 for unlimited)." } // InputSchema 返回工具的 JSON Schema 输入定义. func (t *GrepTool) InputSchema() json.RawMessage { return json.RawMessage(`{ "type": "object", "properties": { "pattern": { "type": "string", "description": "The regular expression pattern to search for in file contents" }, "path": { "type": "string", "description": "File or directory to search in. Defaults to the current working directory." }, "glob": { "type": "string", "description": "Glob pattern to filter files (e.g. \"*.js\", \"*.{ts,tsx}\"). Multiple patterns separated by commas." }, "output_mode": { "type": "string", "description": "Output mode: \"content\" shows matching lines, \"files_with_matches\" shows only file paths (default), \"count\" shows match counts.", "enum": ["content", "files_with_matches", "count"] }, "context": { "type": "integer", "description": "Number of lines to show before and after each match (like grep -C)" }, "-B": { "type": "integer", "description": "Number of lines to show before each match (like grep -B)" }, "-A": { "type": "integer", "description": "Number of lines to show after each match (like grep -A)" }, "case_insensitive": { "type": "boolean", "description": "Case insensitive search" }, "head_limit": { "type": "integer", "description": "Maximum number of output lines/entries (default 250, 0 for unlimited)" }, "type": { "type": "string", "description": "File type to search (e.g. go, js, py, rust, java). Uses ripgrep type system." }, "multiline": { "type": "boolean", "description": "Enable multiline mode where . matches newlines and patterns can span lines" }, "offset": { "type": "integer", "description": "Skip first N entries before applying head_limit" } }, "required": ["pattern"] }`) } // Metadata 返回工具元数据. func (t *GrepTool) Metadata() tools.Metadata { return tools.Metadata{ ConcurrencySafe: true, ReadOnly: true, Destructive: false, SearchHint: "grep search regex pattern content find", PermissionClass: permission.PermClassReadOnly, AuditOperation: "read", } } // grepMatch 表示单个匹配结果. // 历史包袱(LEGACY): 保留此类型以兼容 grep_test.go 中的旧测试引用. // 内部已迁移到 builtinMatch,但外部包可能依赖此类型. type grepMatch struct { file string line int text string context []string // 上下文行 } // Execute 执行 grep 搜索. func (t *GrepTool) Execute(ctx context.Context, input json.RawMessage, progress tools.ProgressFunc) (*tools.Result, error) { var params grepInput if err := json.Unmarshal(input, ¶ms); err != nil { return nil, fmt.Errorf("grep: invalid input: %w", err) } if params.Pattern == "" { return &tools.Result{ Output: "error: pattern is required", IsError: true, }, nil } // 默认输出模式 outputMode := params.OutputMode if outputMode == "" { outputMode = "files_with_matches" } // 升华改进(ELEVATED): head_limit 的 0=无限制巧妙设计. // JSON 中 int 零值也是 0,用 0 表示"无限制"而非"未设置". // 当 head_limit 未传或传 0 时默认 250;显式传 0 也是 250(Go 无法区分). // 如果确实需要无限制,传一个极大的数(如 999999). // 替代方案:使用 *int 指针类型区分 null 和 0(更精确但 JSON 解析复杂度增加). headLimit := params.HeadLimit if headLimit <= 0 { headLimit = 250 } // 确定搜索路径 searchPath := params.Path var err error if searchPath == "" { searchPath, err = os.Getwd() if err != nil { return &tools.Result{ Output: fmt.Sprintf("error getting working directory: %v", err), IsError: true, }, nil } } info, err := os.Stat(searchPath) if err != nil { return &tools.Result{ Output: fmt.Sprintf("error: path not found: %s", searchPath), IsError: true, }, nil } searchDir := searchPath isFile := !info.IsDir() if isFile { searchDir = filepath.Dir(searchPath) } // 构建引擎参数 grepParams := &GrepParams{ Pattern: params.Pattern, SearchPath: searchPath, SearchDir: searchDir, IsFile: isFile, Glob: params.Glob, OutputMode: outputMode, ContextBefore: params.ContextBefore, ContextAfter: params.ContextAfter, ContextBoth: params.Context, CaseInsensitive: params.CaseInsensitive, HeadLimit: headLimit, FileType: params.FileType, Multiline: params.Multiline, Offset: params.Offset, } // 选择引擎并执行搜索 engine := DetectGrepEngine(t.executor) // 精妙之处(CLEVER): 内置引擎需要先验证 regex 合法性(避免 rg 的错误信息不友好). // 如果是 ripgrep 引擎,rg 会自己报错,但为了统一错误格式, // 我们在这里提前校验一次. if _, regexErr := compileGrepPattern(params.Pattern, params.CaseInsensitive, params.Multiline); regexErr != nil { return &tools.Result{ Output: fmt.Sprintf("error: invalid regex pattern: %v", regexErr), IsError: true, }, nil } result, err := engine.Search(ctx, grepParams) if err != nil { return &tools.Result{ Output: fmt.Sprintf("error: %v", err), IsError: true, }, nil } if result.TotalMatches == 0 || result.Output == "" { return &tools.Result{ Output: fmt.Sprintf("No matches found for pattern: %s", params.Pattern), IsError: false, }, nil } return &tools.Result{ Output: result.Output, IsError: false, }, nil } // compileGrepPattern 编译 grep 正则表达式(用于提前校验). func compileGrepPattern(pattern string, caseInsensitive, multiline bool) (*regexp.Regexp, error) { regexPattern := pattern if caseInsensitive { regexPattern = "(?i)" + regexPattern } if multiline { regexPattern = "(?s)" + regexPattern } return regexp.Compile(regexPattern) } // truncateLine 截断超过指定长度的行. func truncateLine(line string, maxLen int) string { if len(line) <= maxLen { return line } return line[:maxLen] + "... [line truncated]" } // isBinaryExtension 判断是否是二进制文件扩展名. // 精妙之处(CLEVER): 用 map 而非 switch - O(1) 查找,且易于扩展. // 覆盖常见的可执行文件,图片,压缩包,文档,音视频,字体格式. func isBinaryExtension(ext string) bool { ext = toLowerASCII(ext) return binaryExtMap[ext] } // 升华改进(ELEVATED): 将扩展名映射提取为包级变量,避免每次调用都创建 map. // 替代方案:函数内部创建 map(原始设计,每次调用都分配内存,GC 压力大). var binaryExtMap = map[string]bool{ ".exe": true, ".dll": true, ".so": true, ".dylib": true, ".a": true, ".o": true, ".obj": true, ".bin": true, ".dat": true, ".png": true, ".jpg": true, ".jpeg": true, ".gif": true, ".bmp": true, ".ico": true, ".svg": true, ".webp": true, ".zip": true, ".tar": true, ".gz": true, ".bz2": true, ".xz": true, ".7z": true, ".rar": true, ".pdf": true, ".doc": true, ".docx": true, ".xls": true, ".xlsx": true, ".wasm": true, ".pyc": true, ".class": true, ".mp3": true, ".mp4": true, ".avi": true, ".mov": true, ".wav": true, ".ttf": true, ".otf": true, ".woff": true, ".woff2": true, ".eot": true, } // toLowerASCII 将 ASCII 字符串转为小写(不分配内存的快速实现). func toLowerASCII(s string) string { hasUpper := false for i := 0; i < len(s); i++ { if s[i] >= 'A' && s[i] <= 'Z' { hasUpper = true break } } if !hasUpper { return s } b := make([]byte, len(s)) for i := 0; i < len(s); i++ { c := s[i] if c >= 'A' && c <= 'Z' { c += 'a' - 'A' } b[i] = c } return string(b) }