package memory // MemoryExtractor 从对话中提取值得记住的信息. // // 升华改进(ELEVATED): 接口只定义策略,执行由 ForkedAgent(SubAgent fork 模式)负责. // 配方和厨房分离:Extractor 说"提取什么",ForkedAgent 负责"怎么跑". // 替代方案:Extractor 自己持有 API client 直接执行(变成小 Engine,职责重叠). import ( "fmt" "strings" ) // MemoryExtractor 从对话中提取值得记住的信息. // // 升华改进(ELEVATED): 接口只定义策略,执行由 Engine 的 SubAgent fork 模式负责. // 这样 Extractor 完全不知道 API,模型,token 这些执行细节, // 只专注于"提取什么"和"什么时候提取". // 替代方案:Extractor 自己持有 API client 直接调用模型(职责越界,变成小 Engine). // // Shape: synchronous callback. Engine calls ShouldExtract / Extract at // turn boundaries; the extractor decides if extraction is warranted and // produces candidate memories. // // 形态: 同步回调. 引擎在 turn 边界同步调 ShouldExtract / Extract; extractor // 判断是否该抽取并产出候选记忆. type MemoryExtractor interface { // Name 返回提取器的名称标识. Name() string // ShouldExtract 判断是否应触发提取. // turnCount: 当前对话已完成的轮数 // lastExtractTurn: 上次提取时的轮数(0 = 从未提取) ShouldExtract(turnCount int, lastExtractTurn int) bool // BuildPrompt 构建提取提示词. // existingMemories: 当前已有的记忆条目(避免重复提取) // newMessageCount: 自上次提取以来的新消息数(SubAgent 凭此精准定位分析范围) // 返回发送给提取子 agent 的完整 prompt. // // 升华改进(ELEVATED): 相比早期方案 Go 签名 BuildPrompt(existingMemories []*Entry), // 加入 newMessageCount--早期实现 buildExtractAutoOnlyPrompt(newMessageCount, existingMemories) // 会在 prompt 中注入 "Analyze the most recent ~N messages",让 SubAgent 只看最近的 // 消息而非全部历史,避免重复提取旧内容.我们把这个参数提升到接口层, // 所有场景(编程/仓储/金融)都能利用精准定位. // 替代方案:由 Engine 在调用后包装一句话(职责扩散,场景 prompt 无法定制定位粒度). BuildPrompt(existingMemories []*Entry, newMessageCount int) string // AllowedTools 返回提取代理允许使用的工具名列表. // 提取子 agent 只能使用这些工具,防止越权操作. AllowedTools() []string // MaxTurns 返回提取代理的最大轮数. MaxTurns() int } // DefaultCodeExtractor 编程场景的默认记忆提取器. // // 提取策略: // - 每 5 轮对话检查一次(避免频繁提取浪费 API 调用) // - 关注:项目结构,代码规范,技术决策,用户偏好 // - 输出格式:YAML frontmatter + markdown 正文 // - 最多 5 轮(提取任务通常 1-2 轮就够了) type DefaultCodeExtractor struct{} func (e *DefaultCodeExtractor) Name() string { return "code" } // ShouldExtract 判断是否应触发提取. // 精妙之处(CLEVER): 每 5 轮检查一次,而非每轮都检查. // 频繁提取浪费 API 调用且收益递减(短间隔内的对话内容往往在同一个主题上). // 5 轮是经验值:大约对应一个完整的"提问→探索→实现→验证"子任务周期. func (e *DefaultCodeExtractor) ShouldExtract(turnCount, lastExtractTurn int) bool { if turnCount <= 0 { return false } // 至少完成 5 轮对话才开始提取 if turnCount < 5 { return false } // 自上次提取以来至少过了 5 轮 return turnCount-lastExtractTurn >= 5 } // BuildPrompt 构建编程场景的记忆提取提示词. // // 升华改进(ELEVATED): 相比早期方案 Go 实现,补入了两项早期实现 的关键设计: // 1. "most recent ~N messages" 精准定位--SubAgent 只看新消息,不重复分析旧内容; // 2. 并行读写策略--turn 1 所有 Read 并行,turn 2 所有 Write/Edit 并行, // 节省 2x token 往返(早期实现 buildExtractAutoOnlyPrompt 第 39 行明确写出). // // 替代方案:不传 newMessageCount,只说 "review conversation history"-- // SubAgent 可能重复分析已提取内容,浪费 token 且写重复记忆. func (e *DefaultCodeExtractor) BuildPrompt(existingMemories []*Entry, newMessageCount int) string { var sb strings.Builder // 精妙之处(CLEVER): newMessageCount > 0 时精准指向最近 N 条消息; // = 0 时退回到"分析全部历史"--第一次提取或计数失效时的安全兜底. targetRange := "the full conversation history" if newMessageCount > 0 { targetRange = fmt.Sprintf("the most recent ~%d messages", newMessageCount) } sb.WriteString(fmt.Sprintf( "You are now acting as the memory extraction subagent. Analyze %s above and use them to update persistent memory.\n\n", targetRange, )) sb.WriteString("## Turn budget strategy\n\n") sb.WriteString("You have a limited turn budget. Edit requires a prior Read of the same file, so the efficient strategy is:\n") sb.WriteString("- **Turn 1**: issue all Read calls in parallel for every file you might update\n") sb.WriteString("- **Turn 2**: issue all Write/Edit calls in parallel\n") sb.WriteString("Do not interleave reads and writes across multiple turns.\n\n") sb.WriteString(fmt.Sprintf( "You MUST only use content from %s to update memories. "+ "Do not investigate or verify content further — no grepping source files, no reading code to confirm patterns, no git commands.\n\n", targetRange, )) sb.WriteString("## What to extract\n\n") sb.WriteString("Focus on information that would be useful for future sessions:\n\n") sb.WriteString("1. **Project Structure**: directory layout, key files, build systems, frameworks\n") sb.WriteString("2. **Code Conventions**: naming conventions, patterns, preferred libraries\n") sb.WriteString("3. **Technical Decisions**: architecture choices, design rationale, trade-offs\n") sb.WriteString("4. **User Preferences**: coding style, workflow habits, tool preferences\n") sb.WriteString("5. **Lessons Learned**: bugs encountered, solutions found, performance insights\n\n") sb.WriteString("## Output format\n\n") sb.WriteString("For each piece of information worth remembering, save it as a memory file with YAML frontmatter:\n\n") sb.WriteString("```markdown\n---\nname: descriptive-name\ntype: project|user|feedback|reference\ndescription: Brief one-line description\n---\n\nDetailed content here in markdown format.\n```\n\n") sb.WriteString("## Guidelines\n\n") sb.WriteString("- Do NOT duplicate information already in existing memories\n") sb.WriteString("- Prefer updating existing memories over creating new ones\n") sb.WriteString("- Keep descriptions concise (one line)\n") sb.WriteString("- Keep content focused and actionable\n") sb.WriteString("- Use kebab-case for names (e.g., \"project-structure\", \"code-conventions\")\n") // 列出现有记忆(避免重复提取) if len(existingMemories) > 0 { sb.WriteString("\n## Existing memories (do NOT duplicate)\n\n") sb.WriteString("Check this list before writing — update an existing file rather than creating a duplicate.\n\n") for _, m := range existingMemories { sb.WriteString(fmt.Sprintf("- **%s** (%s): %s\n", m.Name, m.Type, m.Description)) } } return sb.String() } // AllowedTools 返回编程场景提取器允许使用的工具. // // 升华改进(ELEVATED): 只允许文件系统工具 + 搜索工具,不允许 Bash,Agent 等. // 提取器的职责是"读取和记录",不应该有执行代码或创建子 agent 的能力. // Edit/Write 只用于写 memory 目录下的文件(由 SubAgentConfig.MemoryDirRestrict 进一步限制). // 替代方案:允许所有工具(权限过宽,提取器可能意外修改代码文件). func (e *DefaultCodeExtractor) AllowedTools() []string { return []string{"Read", "Grep", "Glob", "Edit", "Write"} } // MaxTurns 返回最大轮数. // 提取任务通常 1-2 轮就完成,5 轮是安全上限. func (e *DefaultCodeExtractor) MaxTurns() int { return 5 }