package builtin // FileEdit 工具 -- 精确字符串替换编辑文件. // // 这是 Agent 修改代码的核心能力:通过精确的 old_string -> new_string 替换 // 来编辑文件内容,避免整文件重写带来的风险. // // 架构决策:validate + execute 两步分离. // 反向思考:为什么不合并成单函数? // - 合并更简洁,但无法在权限检查前预演结果 // - 编排器需要在 Execute 前做权限/钩子检查,validate 的结果可以提供 // 足够信息(匹配数,行号)给权限系统做决策 // - 测试时 validate 可以独立测试,不需要真正写入文件 // - 两步分离允许 dry-run 模式,只验证不执行 // // 特性: // - old_string 必须在文件中唯一存在(否则报错要求更多上下文) // - 支持 replace_all 参数,替换所有匹配 // - 验证 old_string != new_string // - 找不到 old_string 时:搜索最相似的内容并给出智能提示 // - 多处匹配时:列出所有匹配位置的行号 // - 空白差异检测:tab/space,trailing spaces 差异给出明确提示 // - 换行符规范化:统一处理 \r\n 和 \n // - Curly Quote 智能匹配:模型输出直引号,文件用弯引号时自动适配 // - API 反消毒:还原被 API 消毒的特殊标记 // - 文件缓存集成:编辑后更新文件状态缓存 // - ConcurrencySafe: false,文件写入操作不可并发 import ( "context" "encoding/json" "fmt" "os" "path/filepath" "strings" "time" "unicode/utf8" "git.flytoex.net/yuanwei/flyto-agent/internal/syslib/diff" "git.flytoex.net/yuanwei/flyto-agent/pkg/permission" "git.flytoex.net/yuanwei/flyto-agent/pkg/security" "git.flytoex.net/yuanwei/flyto-agent/pkg/tools" ) // ───────────────────────────────────────────────────────────────────── // 反消毒表(Desanitization Table) // ───────────────────────────────────────────────────────────────────── // 精妙之处(CLEVER): API 层在送往模型前会消毒某些特殊标记(防止 prompt // injection),模型编辑时只能输出消毒后的版本.工具层必须反消毒回来, // 否则替换会失败(文件里是原始版本,模型给的是消毒版本). // // 反向思考:为什么不在 API 层处理? // - API 层不知道哪些 tool input 包含文件内容,只有 FileEdit 知道 // - 在 API 层处理会影响所有工具,带来不必要的副作用 // - 消毒是 API 的责任,反消毒是 FileEdit 的责任 -- 关注点分离 var desanitizations = map[string]string{ "": "", "\n\nH:": "\n\nHuman:", "\n\nA:": "\n\nAssistant:", "< META_START >": "", "< META_END >": "", } // ───────────────────────────────────────────────────────────────────── // Curly Quote(弯引号/智能引号)常量 // ───────────────────────────────────────────────────────────────────── // 精妙之处(CLEVER): LLM 只能输出 ASCII 直引号 " ' ,但源文件可能包含 // Unicode 弯引号 \u201c \u201d \u2018 \u2019.这组常量定义了所有需要 // 处理的引号映射关系. // // 反向思考:为什么不直接 normalize 文件内容? // - 那样会破坏文件中真实的 Unicode 引号(文学作品,国际化字符串) // - 正确做法是:匹配时 normalize,替换时保留原样式 const ( // 直引号(模型输出) straightDoubleQuote = '"' straightSingleQuote = '\'' // 弯引号(文件中可能存在) leftDoubleQuote = '\u201c' // " rightDoubleQuote = '\u201d' // " leftSingleQuote = '\u2018' // ' rightSingleQuote = '\u2019' // ' ) // ───────────────────────────────────────────────────────────────────── // 类型定义 // ───────────────────────────────────────────────────────────────────── // FileEditTool 是文件编辑工具. type FileEditTool struct { fileCache FileCacheRecorder // 文件缓存(可选,为 nil 时不记录) fileHistory FileHistoryRecorder // 文件历史(可选,为 nil 时不备份) cwd string // 工作目录(用于符号链接沙箱检查) messageID string // 当前消息 ID(由编排器设置,用于关联文件历史) guard security.SecretGuard // 秘密扫描(可选,为 nil 时不扫描) } // NewFileEditTool 创建一个 FileEdit 工具实例(无缓存). // 默认 cwd 为当前工作目录. func NewFileEditTool() *FileEditTool { cwd, _ := os.Getwd() return &FileEditTool{cwd: cwd} } // NewFileEditToolWithCwd 创建一个指定工作目录的 FileEdit 工具实例(无缓存). // 升华改进(ELEVATED): 显式注入 cwd,使工具可测试且不依赖进程全局状态. // 替代方案:每次 apply 时调用 os.Getwd()(简单但不可测试). func NewFileEditToolWithCwd(cwd string) *FileEditTool { return &FileEditTool{cwd: cwd} } // NewFileEditToolWithCache 创建一个带文件缓存的 FileEdit 工具实例. // 精妙之处(CLEVER): 与 FileReadToolWithCache 对称 -- 读时记录,编辑后也 // 更新缓存,保证缓存一致性.否则编辑后 FileRead 的缓存就是陈旧的. func NewFileEditToolWithCache(cache FileCacheRecorder) *FileEditTool { cwd, _ := os.Getwd() return &FileEditTool{fileCache: cache, cwd: cwd} } // NewFileEditToolFull 创建一个带文件缓存和指定工作目录的 FileEdit 工具实例. func NewFileEditToolFull(cache FileCacheRecorder, cwd string) *FileEditTool { return &FileEditTool{fileCache: cache, cwd: cwd} } // NewFileEditToolComplete 创建一个完整配置的 FileEdit 工具实例. // 升华改进(ELEVATED): 注入文件历史,编辑前自动备份. // 替代方案:在 Engine 层做备份(散落在编排逻辑中,不如工具内部自包含). func NewFileEditToolComplete(cache FileCacheRecorder, history FileHistoryRecorder, cwd string) *FileEditTool { return &FileEditTool{fileCache: cache, fileHistory: history, cwd: cwd} } // NewFileEditToolWithGuard 创建带秘密扫描的完整配置 FileEdit 工具实例. // guard 对 new_string 内容进行扫描,有秘密时拒绝写入. // 升华改进(ELEVATED): 编辑工具只注入 new_string 到文件,因此只扫描 new_string, // 不扫描 old_string(old_string 只是匹配条件,不写入文件). // 替代方案:<扫描整个 new_string + old_string> - 否决原因:old_string 是文件已有内容, // 不是 Agent 新引入的;扫描它会导致含秘密的文件无法被任何编辑操作修改(即使是删除秘密). func NewFileEditToolWithGuard(cache FileCacheRecorder, history FileHistoryRecorder, cwd string, guard security.SecretGuard) *FileEditTool { return &FileEditTool{fileCache: cache, fileHistory: history, cwd: cwd, guard: guard} } // SetMessageID 设置当前消息 ID(由编排器在每轮开始时调用). func (t *FileEditTool) SetMessageID(id string) { t.messageID = id } // fileEditInput 是 FileEdit 工具的输入参数. type fileEditInput struct { FilePath string `json:"file_path"` OldString string `json:"old_string"` NewString string `json:"new_string"` ReplaceAll bool `json:"replace_all,omitempty"` } // editValidation 是 validate 阶段的结果,传递给 execute 阶段. // 反向思考:为什么不用 any 或直接嵌到 Execute 里? // - 强类型结构体让两阶段的契约明确可测试 // - Execute 消费 validate 的结果时不需要做类型断言 // - 中间结果可序列化,支持分布式场景(未来扩展) type editValidation struct { // 输入(已预处理) filePath string oldString string // 反消毒 + CRLF 规范化后的 newString string // 反消毒 + CRLF 规范化 + 引号风格保留后的 replaceAll bool // 文件状态 content string // 文件内容(CRLF 已规范化为 LF) originalHasCRLF bool // 原文件是否使用 CRLF fileMode os.FileMode // 匹配结果 matchCount int // old_string 在文件中的出现次数 matchLines []int // 各匹配位置的起始行号 // 精妙之处(CLEVER): 标记是否通过引号规范化才匹配上的. // 如果是,new_string 也需要做引号风格保留(preserveQuoteStyle). matchedViaQuoteNormalization bool } // ───────────────────────────────────────────────────────────────────── // Tool 接口实现 // ───────────────────────────────────────────────────────────────────── // Name 返回工具名称. func (t *FileEditTool) Name() string { return "Edit" } // Description 返回工具描述. func (t *FileEditTool) Description(ctx context.Context) string { return "Performs exact string replacements in files. " + "The old_string must be unique in the file (unless replace_all is true). " + "If old_string is not unique, all matching line numbers are listed. " + "If old_string is not found, similar content is suggested. " + "Handles CRLF/LF normalization and curly quote matching automatically." } // InputSchema 返回工具的 JSON Schema 输入定义. func (t *FileEditTool) InputSchema() json.RawMessage { return json.RawMessage(`{ "type": "object", "properties": { "file_path": { "type": "string", "description": "The absolute path to the file to modify" }, "old_string": { "type": "string", "description": "The text to replace" }, "new_string": { "type": "string", "description": "The text to replace it with (must be different from old_string)" }, "replace_all": { "type": "boolean", "description": "Replace all occurrences of old_string (default false)", "default": false } }, "required": ["file_path", "old_string", "new_string"] }`) } // Metadata 返回工具元数据. func (t *FileEditTool) Metadata() tools.Metadata { return tools.Metadata{ ConcurrencySafe: false, ReadOnly: false, Destructive: false, Aliases: []string{"FileEdit"}, SearchHint: "edit file replace string modify", PermissionClass: permission.PermClassFile, AuditOperation: "edit", } } // ───────────────────────────────────────────────────────────────────── // Execute -- 编排 validate + execute 两阶段 // ───────────────────────────────────────────────────────────────────── // Execute 执行文件编辑操作. // 内部分为两步:validate(纯函数,不修改文件)+ execute(应用修改). func (t *FileEditTool) Execute(ctx context.Context, input json.RawMessage, progress tools.ProgressFunc) (*tools.Result, error) { var params fileEditInput if err := json.Unmarshal(input, ¶ms); err != nil { return nil, fmt.Errorf("fileedit: invalid input: %w", err) } // cwd resolution for symlink guard: SubAgent-injected override wins // (worktree isolation); empty means "use construction-time cwd". // Security: the override can only be set by pkg/tools.WithWorkdir, // whose ctx key is a private type — external code cannot forge a // cwd to escape the guard. See pkg/tools/workdir.go. // // symlink guard 的 cwd 解析: SubAgent ctx 注入的覆盖优先 (worktree // 隔离); 空值回退构造期 cwd. 安全: override 只能由 pkg/tools.WithWorkdir // 设置, 其 ctx key 是私有类型, 外部代码无法伪造 cwd 绕过 guard. // 见 pkg/tools/workdir.go. cwd := tools.WorkdirFromContext(ctx) if cwd == "" { cwd = t.cwd } // ── 阶段 0a: 秘密扫描(最优先,在一切副作用之前)── // 只扫描 new_string(即将写入的内容),不扫描 old_string. // 原因见 NewFileEditToolWithGuard 注释. if t.guard != nil && params.NewString != "" { matches, err := t.guard.Scan(params.FilePath, params.NewString) if err == security.ErrContentTooLarge { _ = err // 超限时放行(同 FileWriteTool 策略) } else if err != nil { return &tools.Result{ Output: fmt.Sprintf("error: secret scan failed: %v", err), IsError: true, }, nil } else if len(matches) > 0 { labels := strings.Join(security.MatchLabels(matches), ", ") return &tools.Result{ Output: fmt.Sprintf( "error: secret detected — %s — cannot edit %s\n"+ "Remove the sensitive content from new_string and try again.", labels, params.FilePath, ), IsError: true, }, nil } } // ── 阶段 0b: 文件历史备份 ── // 精妙之处(CLEVER): 在 validate 之前就备份--即使 validate 后续失败, // 备份操作是幂等的(同一消息+同一文件只备份一次),不会有副作用. // 在 validate 之后备份有风险:validate 读了文件内容,如果之间有并发修改, // 备份的就不是 validate 看到的版本了. if t.fileHistory != nil && params.FilePath != "" { _ = t.fileHistory.BeforeEdit(params.FilePath, t.messageID) } // ── 阶段 1: Validate ── validation, validErr := t.validate(params) if validErr != nil { return validErr, nil } // ── 阶段 2: Execute ── return t.apply(validation, cwd) } // ───────────────────────────────────────────────────────────────────── // 阶段 1: Validate -- 纯验证,不修改文件系统 // ───────────────────────────────────────────────────────────────────── // validate 验证编辑请求的合法性,返回 editValidation 或错误结果. // 这是纯函数(除了读文件),不修改任何状态. // // 精妙之处(CLEVER): 返回的是 (*tools.Result, nil) 而不是 (nil, error). // 原因:验证失败是业务错误(用户输入不对),不是系统错误. // Go error 应该保留给"不该发生的事"(磁盘坏了,内存不够). func (t *FileEditTool) validate(params fileEditInput) (*editValidation, *tools.Result) { // ── 基础参数校验 ── if params.FilePath == "" { return nil, &tools.Result{ Output: "error: file_path is required", IsError: true, } } if params.OldString == params.NewString { return nil, &tools.Result{ Output: "error: old_string and new_string must be different", IsError: true, } } // ── 读取文件 ── data, err := os.ReadFile(params.FilePath) if err != nil { if os.IsNotExist(err) { return nil, &tools.Result{ Output: fmt.Sprintf("error: file not found: %s", params.FilePath), IsError: true, } } if os.IsPermission(err) { return nil, &tools.Result{ Output: fmt.Sprintf("error: permission denied: %s", params.FilePath), IsError: true, } } return nil, &tools.Result{ Output: fmt.Sprintf("error: %v", err), IsError: true, } } content := string(data) // ── 换行符规范化 ── // 历史包袱(LEGACY): Windows 的 CRLF 和 Unix 的 LF 混用问题由来已久. // 必须在匹配前统一,否则模型给的 \n 和文件里的 \r\n 对不上. originalHasCRLF := strings.Contains(content, "\r\n") content = strings.ReplaceAll(content, "\r\n", "\n") oldString := strings.ReplaceAll(params.OldString, "\r\n", "\n") newString := strings.ReplaceAll(params.NewString, "\r\n", "\n") // ── 反消毒 ── // 精妙之处(CLEVER): 必须在匹配前做反消毒.模型看到的是消毒版, // 它的 old_string 里包含消毒标记,但文件里是原始标记. oldString = applyDesanitizations(oldString) newString = applyDesanitizations(newString) // ── 获取文件权限(用于后续写回) ── info, err := os.Stat(params.FilePath) if err != nil { return nil, &tools.Result{ Output: fmt.Sprintf("error: %v", err), IsError: true, } } // ── 查找 old_string ── actualOld, matchedViaQuotes := findActualString(content, oldString) if actualOld == "" { // 找不到 - 提供智能错误提示 hint := buildNotFoundHint(content, oldString, params.FilePath) return nil, &tools.Result{ Output: hint, IsError: true, } } // 精妙之处(CLEVER): 如果通过引号规范化匹配到了,new_string 也需要 // 做引号风格转换.确保替换后的内容保持文件原有的引号风格. if matchedViaQuotes { newString = preserveQuoteStyle(actualOld, oldString, newString) } count := strings.Count(content, actualOld) lineNums := findMatchLineNumbers(content, actualOld) // ── 唯一性检查 ── if !params.ReplaceAll && count > 1 { lineNumStrs := make([]string, len(lineNums)) for i, ln := range lineNums { lineNumStrs[i] = fmt.Sprintf("%d", ln) } return nil, &tools.Result{ Output: fmt.Sprintf("Found %d matches at lines %s in %s. Please provide more context to make it unique, or set replace_all to true.", count, strings.Join(lineNumStrs, ", "), params.FilePath), IsError: true, } } return &editValidation{ filePath: params.FilePath, oldString: actualOld, newString: newString, replaceAll: params.ReplaceAll, content: content, originalHasCRLF: originalHasCRLF, fileMode: info.Mode(), matchCount: count, matchLines: lineNums, matchedViaQuoteNormalization: matchedViaQuotes, }, nil } // ───────────────────────────────────────────────────────────────────── // 阶段 2: Apply -- 执行文件修改 // ───────────────────────────────────────────────────────────────────── // apply 根据 validate 阶段的结果执行文件修改. // 分离出来的好处:这一步的逻辑极简,出错面很小. func (t *FileEditTool) apply(v *editValidation, cwd string) (*tools.Result, error) { // ── 符号链接穿透写入 + 沙箱检查 ── // 升华改进(ELEVATED): 符号链接穿透写入.检测到符号链接后解析真实目标, // 验证目标在工作目录范围内(防止沙箱逃逸),然后写入真实目标. // 替代方案1:阻止编辑符号链接(更安全但 monorepo 场景会报错). // 替代方案2:无条件穿透(原始设计,简单但有安全风险). targetPath := v.filePath if linkInfo, err := os.Lstat(v.filePath); err == nil { if linkInfo.Mode()&os.ModeSymlink != 0 { realPath, err := filepath.EvalSymlinks(v.filePath) if err != nil { return &tools.Result{ Output: fmt.Sprintf("file is a symlink but cannot resolve target: %v", err), IsError: true, }, nil } // 沙箱检查:真实目标必须在 cwd 范围内 (SubAgent worktree 下 cwd // 由 ctx 覆盖, 见 Execute 开头的 WorkdirFromContext 解析). // Sandbox check: real target must be under cwd (SubAgent worktree // mode overrides cwd via ctx — see WorkdirFromContext resolution // at the top of Execute). if !isPathUnder(realPath, cwd) { return &tools.Result{ Output: fmt.Sprintf("symlink target %s is outside working directory %s", realPath, cwd), IsError: true, }, nil } // 穿透:写入真实目标 targetPath = realPath } } // ── 执行替换 ── var newContent string if v.replaceAll { newContent = strings.ReplaceAll(v.content, v.oldString, v.newString) } else { newContent = strings.Replace(v.content, v.oldString, v.newString, 1) } // ── 恢复 CRLF ── // 历史包袱(LEGACY): 如果原文件用 CRLF,替换后也要转回去. // 不能只转 newString 部分 -- 整个文件在 validate 阶段已被规范化为 LF. if v.originalHasCRLF { newContent = strings.ReplaceAll(newContent, "\n", "\r\n") } // ── 生成 Unified Diff 预览 ── // 升华改进(ELEVATED): 使用 Myers diff 算法生成多 hunk 结构化 diff, // 替代原来的简单首尾对比.支持文件中多处不连续变更的精确 diff 展示. // 替代方案:简单首尾匹配 diff(只能生成单 hunk,多处变更会把中间不相关行也包进去). hunks := diff.StructuredPatch(v.content, newContent, 3) diffText := diff.FormatUnified(v.filePath, hunks) // ── 原子写入三层防御 ── // 升华改进(ELEVATED): 原子写入三层防御,对齐生产级实现. // 替代方案:简单 os.WriteFile(无原子性保证,崩溃时可能损坏文件). newBytes := []byte(newContent) // 1. 临时文件名含 PID + 纳秒时间戳(防并发冲突) tmpPath := fmt.Sprintf("%s.tmp.%d.%d", targetPath, os.Getpid(), time.Now().UnixNano()) // 2. 写入临时文件 if err := os.WriteFile(tmpPath, newBytes, 0644); err != nil { return &tools.Result{ Output: fmt.Sprintf("error writing temp file: %v", err), IsError: true, }, nil } // 3. 保留原文件权限 if v.fileMode != 0 { if err := os.Chmod(tmpPath, v.fileMode); err != nil { // 非致命:权限设置失败不阻止写入,但记录到输出让用户知道 _ = err } } // 4. 原子 rename if err := os.Rename(tmpPath, targetPath); err != nil { // 清理临时文件 os.Remove(tmpPath) // 5. Fallback:直接写(Windows 跨卷 rename 会失败) // 历史包袱(LEGACY): Windows 上跨卷 rename 不支持原子操作, // 只能降级到直接写.这意味着 Windows 用户在极端情况下(写入过程中崩溃) // 可能遇到文件损坏.暂无更好方案. if err := os.WriteFile(targetPath, newBytes, v.fileMode); err != nil { return &tools.Result{ Output: fmt.Sprintf("error writing file (fallback after atomic rename failed): %v", err), IsError: true, }, nil } } // 6. 清理可能残留的旧临时文件(前次崩溃遗留) // 不在关键路径上,失败不影响主流程 cleanupStaleTmpFiles(filepath.Dir(targetPath)) // ── 更新文件缓存 ── // 精妙之处(CLEVER): 编辑后立即更新缓存,这样 FileRead 的缓存就不会 // 是陈旧的.不需要等下次 FileRead 时才发现 mtime 变了. if t.fileCache != nil { t.fileCache.Record(v.filePath, newBytes) } // ── 构建成功消息(含 diff) ── var msg string if v.replaceAll { msg = fmt.Sprintf("Successfully replaced %d occurrences in %s", v.matchCount, v.filePath) } else { msg = fmt.Sprintf("Successfully edited %s", v.filePath) } if diffText != "" { msg += "\n\n" + diffText } return &tools.Result{ Output: msg, IsError: false, Data: &FileEditResultData{Diff: diffText, FilePath: v.filePath}, }, nil } // FileEditResultData 携带编辑的结构化结果(消费层可用于 diff 展示). type FileEditResultData struct { Diff string // unified diff 文本 FilePath string // 编辑的文件路径 } // isPathUnder 判断 child 路径是否在 parent 目录之下. // 精妙之处(CLEVER): 使用 filepath.Rel 计算相对路径,如果结果以 ".." 开头 // 则说明 child 不在 parent 之下.这比字符串前缀匹配更健壮-- // 能正确处理 "/a/b/../c" 这样的路径. func isPathUnder(child, parent string) bool { if parent == "" { return false } rel, err := filepath.Rel(parent, child) if err != nil { return false } // 相对路径不应以 ".." 开头 return !strings.HasPrefix(rel, "..") && rel != ".." } // cleanupStaleTmpFiles 清理目录中可能残留的旧临时文件. // 精妙之处(CLEVER): 只清理超过 1 小时的 .tmp.* 文件,避免误删正在使用的临时文件. // 不在关键路径上,任何错误都静默忽略. func cleanupStaleTmpFiles(dir string) { entries, err := os.ReadDir(dir) if err != nil { return } cutoff := time.Now().Add(-1 * time.Hour) for _, entry := range entries { if entry.IsDir() { continue } name := entry.Name() if !strings.Contains(name, ".tmp.") { continue } info, err := entry.Info() if err != nil { continue } if info.ModTime().Before(cutoff) { os.Remove(filepath.Join(dir, name)) } } } // ───────────────────────────────────────────────────────────────────── // 反消毒(Desanitization) // ───────────────────────────────────────────────────────────────────── // applyDesanitizations 将字符串中被 API 消毒的标记还原为原始形式. // 历史包袱(LEGACY): 这个函数的存在本身就是历史包袱 -- 理想世界里 API // 层和工具层应该有统一的消毒/反消毒管道,而不是在工具层硬编码还原表. // 但现实中 API 层的消毒逻辑不受我们控制(第三方 API),所以只能在这里兜底. func applyDesanitizations(s string) string { for sanitized, original := range desanitizations { s = strings.ReplaceAll(s, sanitized, original) } return s } // ───────────────────────────────────────────────────────────────────── // Curly Quote 智能匹配(弯引号/智能引号) // ───────────────────────────────────────────────────────────────────── // findActualString 在文件内容中查找 old_string,支持引号规范化回退. // // 精妙之处(CLEVER): 两级匹配策略 -- // 1. 精确匹配:old_string 原样存在于 content 中 // 2. 引号规范化匹配:将 content 中的弯引号规范化为直引号后再匹配 // 如果匹配到了,返回文件中的原始文本(带弯引号的版本) // // 返回值: // - actualOld: 文件中实际匹配到的字符串(可能是弯引号版本) // - matchedViaQuotes: 是否通过引号规范化才匹配上的 // // 反向思考:为什么不直接把文件中的弯引号全替换成直引号? // - 那样会破坏文件原有的排版风格 // - 编辑器/构建工具可能依赖特定的引号字符 // - 正确做法是「用直引号匹配,但保留原始弯引号风格」 func findActualString(content, oldString string) (actualOld string, matchedViaQuotes bool) { // 策略 1:精确匹配 if strings.Contains(content, oldString) { return oldString, false } // 策略 2:引号规范化匹配 normalizedContent := normalizeQuotes(content) normalizedOld := normalizeQuotes(oldString) idx := strings.Index(normalizedContent, normalizedOld) if idx == -1 { return "", false } // 精妙之处(CLEVER): normalizedContent 和 content 的字节长度不同 // (弯引号是多字节 UTF-8),但我们需要从 content 中提取原始子串. // 关键洞察:normalizeQuotes 只会将多字节字符替换为单字节字符, // 所以通过 rune 级别的索引映射来定位原始位置. originalStart := runeOffsetToByteOffset(content, normalizedContent, idx) originalEnd := runeOffsetToByteOffset(content, normalizedContent, idx+len(normalizedOld)) if originalStart >= 0 && originalEnd >= 0 && originalEnd <= len(content) { return content[originalStart:originalEnd], true } return "", false } // runeOffsetToByteOffset 将规范化字符串中的字节偏移转换为原始字符串中的字节偏移. // // 精妙之处(CLEVER): 因为规范化可能改变字符的字节长度(弯引号 3 字节 → 直引号 1 字节), // 不能简单用字节偏移.需要同步遍历两个字符串的 rune,建立偏移映射. func runeOffsetToByteOffset(original, normalized string, normalizedByteOffset int) int { origIdx := 0 normIdx := 0 for normIdx < normalizedByteOffset && origIdx < len(original) { _, origSize := utf8.DecodeRuneInString(original[origIdx:]) _, normSize := utf8.DecodeRuneInString(normalized[normIdx:]) origIdx += origSize normIdx += normSize } if normIdx == normalizedByteOffset { return origIdx } return -1 } // normalizeQuotes 将所有弯引号规范化为对应的直引号. func normalizeQuotes(s string) string { var b strings.Builder b.Grow(len(s)) for _, r := range s { switch r { case leftDoubleQuote, rightDoubleQuote: b.WriteByte(straightDoubleQuote) case leftSingleQuote, rightSingleQuote: b.WriteByte(straightSingleQuote) default: b.WriteRune(r) } } return b.String() } // preserveQuoteStyle 将 new_string 中的直引号转换为文件中的弯引号风格. // // 精妙之处(CLEVER): 需要区分开引号和闭引号: // - 前面是空格/行首/左括号 → 开引号(左弯引号) // - 否则 → 闭引号(右弯引号) // - 特例:缩写中的撇号(don't 的 ' → 右单引号 \u2019) // // 实现策略:逐字符扫描 new_string,遇到直引号时根据上下文决定 // 替换为哪种弯引号. func preserveQuoteStyle(actualOld, normalizedOld, newString string) string { // 检查原始文本中是否真的包含弯引号 if !containsCurlyQuotes(actualOld) { return newString } var b strings.Builder b.Grow(len(newString) * 2) // 弯引号比直引号占更多字节 runes := []rune(newString) for i, r := range runes { switch r { case straightDoubleQuote: if isOpeningPosition(runes, i) { b.WriteRune(leftDoubleQuote) } else { b.WriteRune(rightDoubleQuote) } case straightSingleQuote: // 精妙之处(CLEVER): 缩写中的撇号(如 don't, it's, we'll) // 前后都有字母,应该用右单引号. if isApostrophe(runes, i) { b.WriteRune(rightSingleQuote) } else if isOpeningPosition(runes, i) { b.WriteRune(leftSingleQuote) } else { b.WriteRune(rightSingleQuote) } default: b.WriteRune(r) } } return b.String() } // containsCurlyQuotes 检查字符串是否包含弯引号. func containsCurlyQuotes(s string) bool { for _, r := range s { switch r { case leftDoubleQuote, rightDoubleQuote, leftSingleQuote, rightSingleQuote: return true } } return false } // isOpeningPosition 判断 runes[i] 所在位置是否是"开引号"位置. // 规则:位于字符串开头,前一个字符是空白/左括号时为开引号位置. func isOpeningPosition(runes []rune, i int) bool { if i == 0 { return true } prev := runes[i-1] return prev == ' ' || prev == '\t' || prev == '\n' || prev == '\r' || prev == '(' || prev == '[' || prev == '{' || prev == '<' } // isApostrophe 判断 runes[i] 处的单引号是否为缩写撇号. // 规则:前后都是字母时为撇号(如 don't, it's, we'll). func isApostrophe(runes []rune, i int) bool { if i <= 0 || i >= len(runes)-1 { return false } prev := runes[i-1] next := runes[i+1] return isLetter(prev) && isLetter(next) } // isLetter 判断 rune 是否为字母(ASCII 字母或 Unicode 字母). func isLetter(r rune) bool { return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= 0x00C0 && r <= 0x024F) // 扩展拉丁字母范围 } // ───────────────────────────────────────────────────────────────────── // 智能提示(Not Found Hint) // ───────────────────────────────────────────────────────────────────── // buildNotFoundHint 构建 old_string 找不到时的智能提示信息. // 包含:空白差异检测,最相似内容推荐. // // 反向思考:为什么不只返回 "not found"? // - 大部分 "not found" 是缩进不对或空白差异,模型需要知道具体原因 // - 提供相似内容可以让模型自我修正,减少重试次数 // - 成本:这个函数在失败路径才跑,不影响正常编辑的性能 func buildNotFoundHint(content, oldString, filePath string) string { var hints []string // 1. 检查空白差异:将所有空白规范化后再比较 if checkWhitespaceDifference(content, oldString) { hints = append(hints, "Whitespace mismatch detected: old_string differs from file content only in whitespace (tabs vs spaces, trailing spaces, etc.)") } // 2. 搜索最相似的内容片段 similar := findSimilarContent(content, oldString) if len(similar) > 0 { hints = append(hints, "Did you mean:") for _, s := range similar { // 截断过长的相似内容 preview := s.text if len(preview) > 200 { preview = preview[:200] + "..." } hints = append(hints, fmt.Sprintf(" line %d: %s", s.lineNum, preview)) } } msg := fmt.Sprintf("error: old_string not found in %s.", filePath) if len(hints) > 0 { msg += "\n" + strings.Join(hints, "\n") } else { msg += " Make sure the string matches exactly, including whitespace and indentation." } return msg } // checkWhitespaceDifference 检查 old_string 是否只因空白差异而匹配不上. // 将文件内容和 old_string 都做空白规范化,看是否能匹配. func checkWhitespaceDifference(content, oldString string) bool { // 规范化空白:tab -> space,去除行尾空白,合并连续空格 normalizeWS := func(s string) string { s = strings.ReplaceAll(s, "\t", " ") lines := strings.Split(s, "\n") for i, l := range lines { lines[i] = strings.TrimRight(l, " ") } s = strings.Join(lines, "\n") // 合并连续空格为单空格 for strings.Contains(s, " ") { s = strings.ReplaceAll(s, " ", " ") } return s } normContent := normalizeWS(content) normOld := normalizeWS(oldString) return strings.Contains(normContent, normOld) } // ───────────────────────────────────────────────────────────────────── // 相似内容搜索 // ───────────────────────────────────────────────────────────────────── // similarMatch 表示一个相似内容的匹配结果. type similarMatch struct { lineNum int text string score int // 相似度得分(越高越相似) } // findSimilarContent 在文件内容中搜索与 oldString 最相似的片段. // 使用多种策略:子串匹配,逐行 Levenshtein 距离. // // 反向思考:为什么不用更高级的算法(如 diff-match-patch)? // - 这只是错误提示,不需要完美匹配 // - 简单策略覆盖 90% 的常见情况(缩进错误,少打/多打字符) // - 复杂算法增加依赖和认知负担,收益不对称 func findSimilarContent(content, oldString string) []similarMatch { lines := strings.Split(content, "\n") oldLines := strings.Split(oldString, "\n") var results []similarMatch // 策略 1:搜索 old_string 的第一行(trimmed)在文件中的出现位置 if len(oldLines) > 0 { firstLine := strings.TrimSpace(oldLines[0]) if len(firstLine) > 10 { // 至少 10 个字符才有搜索意义 for i, line := range lines { if strings.Contains(strings.TrimSpace(line), firstLine) { // 提取匹配位置附近的内容作为预览 endLine := i + len(oldLines) if endLine > len(lines) { endLine = len(lines) } preview := strings.Join(lines[i:endLine], "\n") results = append(results, similarMatch{ lineNum: i + 1, text: preview, score: 100, }) if len(results) >= 3 { break } } } } } // 如果策略 1 找到了结果,直接返回 if len(results) > 0 { return results } // 策略 2:对单行 old_string,计算每行的编辑距离 if len(oldLines) == 1 && len(oldString) < 500 { type scored struct { lineNum int text string dist int } var candidates []scored for i, line := range lines { if len(line) == 0 { continue } dist := levenshtein(oldString, line) // 编辑距离小于 old_string 长度的 40%,认为相似 threshold := len(oldString) * 40 / 100 if threshold < 5 { threshold = 5 } if dist <= threshold { candidates = append(candidates, scored{ lineNum: i + 1, text: line, dist: dist, }) } } // 取最相似的 3 个 for i := 0; i < len(candidates) && i < 3; i++ { // 选择剩余候选中距离最小的 best := 0 for j := range candidates { if candidates[j].dist < candidates[best].dist { best = j } } results = append(results, similarMatch{ lineNum: candidates[best].lineNum, text: candidates[best].text, score: 100 - candidates[best].dist, }) // 移除已选的(与末尾交换) candidates[best] = candidates[len(candidates)-1] candidates = candidates[:len(candidates)-1] if len(candidates) == 0 { break } } } // 策略 3:搜索 old_string 中较长的子串(取中间行) if len(results) == 0 && len(oldLines) > 1 { midIdx := len(oldLines) / 2 midLine := strings.TrimSpace(oldLines[midIdx]) if len(midLine) > 10 { for i, line := range lines { if strings.Contains(strings.TrimSpace(line), midLine) { // 估算起始行 startLine := i - midIdx if startLine < 0 { startLine = 0 } endLine := startLine + len(oldLines) if endLine > len(lines) { endLine = len(lines) } preview := strings.Join(lines[startLine:endLine], "\n") results = append(results, similarMatch{ lineNum: startLine + 1, text: preview, score: 50, }) if len(results) >= 3 { break } } } } } return results } // ───────────────────────────────────────────────────────────────────── // 基础工具函数 // ───────────────────────────────────────────────────────────────────── // levenshtein 计算两个字符串之间的编辑距离(Levenshtein 距离). // 为了性能,限制最大处理长度为 500 个字符. // // 历史包袱(LEGACY): 按字节计算而非 rune,对纯 ASCII 没问题, // 但对中文/日文等多字节字符不够精确.不过对于"相似度提示"场景足够用了. func levenshtein(a, b string) int { if len(a) > 500 { a = a[:500] } if len(b) > 500 { b = b[:500] } la := len(a) lb := len(b) if la == 0 { return lb } if lb == 0 { return la } // 精妙之处(CLEVER): 使用两行滚动数组,空间复杂度 O(min(m,n)) // 而非 O(m*n) 的完整矩阵.对 500 字符以内足够高效. prev := make([]int, lb+1) curr := make([]int, lb+1) for j := 0; j <= lb; j++ { prev[j] = j } for i := 1; i <= la; i++ { curr[0] = i for j := 1; j <= lb; j++ { cost := 1 if a[i-1] == b[j-1] { cost = 0 } ins := curr[j-1] + 1 del := prev[j] + 1 sub := prev[j-1] + cost curr[j] = minInt(ins, minInt(del, sub)) } prev, curr = curr, prev } return prev[lb] } // minInt 返回两个整数中的较小值. // 历史包袱(LEGACY): Go 1.21+ 有 min() 内置函数,但为了兼容旧版本保留. func minInt(a, b int) int { if a < b { return a } return b } // findMatchLineNumbers 找出 old_string 在文件内容中所有匹配位置的起始行号. func findMatchLineNumbers(content, oldString string) []int { var lineNums []int searchFrom := 0 for { idx := strings.Index(content[searchFrom:], oldString) if idx == -1 { break } absIdx := searchFrom + idx // 计算行号:数 absIdx 之前的换行符数量 lineNum := strings.Count(content[:absIdx], "\n") + 1 lineNums = append(lineNums, lineNum) searchFrom = absIdx + len(oldString) } return lineNums } // ───────────────────────────────────────────────────────────────────── // ToolCapability 协议实现 // ───────────────────────────────────────────────────────────────────── // Capability 声明 FileEdit 工具的安全能力. // 实现 tools.CapabilityProvider 接口. // // 升华改进(ELEVATED): FileEdit 是 Level 2 工具(DryRun + Reversible), // Agent 可以先预览 diff 再决定是否执行,执行后可以回滚到原文件. // 替代方案:不声明能力(Agent 无法区分 FileEdit 和 rm -rf 的安全等级). func (t *FileEditTool) Capability() tools.ToolCapability { return tools.ToolCapability{ DryRun: true, Reversible: true, UndoMethod: "tool", UndoToolName: "Write", AffectedResources: []string{"file"}, } } // DryRun 模拟执行文件编辑,返回 diff 预览但不实际修改文件. // 实现 tools.DryRunnable 接口. // // 精妙之处(CLEVER): 复用 validate 阶段的逻辑--validate 本身就不修改文件, // 只需要在 validate 成功后计算 diff 即可. func (t *FileEditTool) DryRun(ctx context.Context, input json.RawMessage) (*tools.DryRunResult, error) { var params fileEditInput if err := json.Unmarshal(input, ¶ms); err != nil { return nil, fmt.Errorf("fileedit: invalid input: %w", err) } validation, validErr := t.validate(params) if validErr != nil { return &tools.DryRunResult{ WouldAffect: params.FilePath, Preview: validErr.Output, }, nil } // 计算 diff 但不写入 var newContent string if validation.replaceAll { newContent = strings.ReplaceAll(validation.content, validation.oldString, validation.newString) } else { newContent = strings.Replace(validation.content, validation.oldString, validation.newString, 1) } hunks := diff.StructuredPatch(validation.content, newContent, 3) diffText := diff.FormatUnified(validation.filePath, hunks) return &tools.DryRunResult{ WouldAffect: params.FilePath, Preview: diffText, EstimatedImpact: map[string]any{ "match_count": validation.matchCount, "match_lines": validation.matchLines, "file_mode": validation.fileMode.String(), }, }, nil } // GenerateUndo 基于编辑结果生成撤销信息. // 实现 tools.Reversible 接口. // // 精妙之处(CLEVER): 撤销 FileEdit 就是用 FileWrite 写回原内容. // 不需要反向 diff--直接读备份恢复即可.这里生成的 UndoInfo // 指向 "Write" 工具,带上原文件内容作为输入. func (t *FileEditTool) GenerateUndo(ctx context.Context, input json.RawMessage, result *tools.Result) (*tools.UndoInfo, error) { var params fileEditInput if err := json.Unmarshal(input, ¶ms); err != nil { return nil, fmt.Errorf("fileedit: invalid input for undo: %w", err) } // 读取当前文件内容(编辑前的版本已经被 FileHistory 备份了, // 这里生成的 UndoInfo 是"调用 Write 写回原内容"的指令). // 注意:此时文件已经被编辑了,所以我们需要从编辑前的版本恢复. // 但 GenerateUndo 在编辑之后调用,文件内容已变. // 升华改进(ELEVATED): 这里只生成"撤销指令"的描述, // 实际回滚由 FileHistory + OperationLog 协同完成. return &tools.UndoInfo{ ToolName: "Write", Input: map[string]any{ "file_path": params.FilePath, "content": "[restored from file history backup]", }, Description: fmt.Sprintf("恢复 %s 到编辑前的版本", params.FilePath), }, nil }