// evolve_test.go -- 自进化系统的单元测试. // // 覆盖场景: // - NewEvolver 创建 // - EvolutionStore 存储操作 // - Propose 提交提案(含审批流程) // - ToolBuilder 验证和持久化 // - SkillLearner 保存和加载 // - History 历史查询 package evolve import ( "context" "encoding/json" "errors" "os" "path/filepath" "strings" "testing" "git.flytoex.net/yuanwei/flyto-agent/pkg/execenv" ) // TestEvolutionStore 测试存储基本操作 func TestEvolutionStore(t *testing.T) { dir := t.TempDir() store, err := NewEvolutionStore(dir) if err != nil { t.Fatalf("创建存储失败: %v", err) } // 保存提案 proposal := &EvolutionProposal{ ID: "test-1", Type: EvolveNewTool, Title: "Test Tool", Description: "A test tool", Status: StatusPending, } if err := store.SaveProposal(proposal); err != nil { t.Fatalf("保存提案失败: %v", err) } // 列出提案 proposals, err := store.ListProposals() if err != nil { t.Fatalf("列出提案失败: %v", err) } if len(proposals) != 1 { t.Fatalf("期望 1 个提案, 实际 %d", len(proposals)) } if proposals[0].ID != "test-1" { t.Errorf("ID 不匹配: %q", proposals[0].ID) } } // TestNewEvolver 测试创建进化器 func TestNewEvolver(t *testing.T) { dir := t.TempDir() // 缺少 StoreDir 应报错 _, err := NewEvolver(&Config{}) if err == nil { t.Error("缺少 StoreDir 应报错") } // 正常创建 e, err := NewEvolver(&Config{ StoreDir: filepath.Join(dir, "evolve"), MaxToolsPerSession: 5, MaxSkillsPerSession: 10, }) if err != nil { t.Fatalf("创建失败: %v", err) } if e.ToolBuilder() == nil { t.Error("ToolBuilder 不应为 nil") } if e.SkillLearner() == nil { t.Error("SkillLearner 不应为 nil") } } // TestEvolver_Propose_Rejected 测试提案被拒绝 func TestEvolver_Propose_Rejected(t *testing.T) { dir := t.TempDir() e, _ := NewEvolver(&Config{ StoreDir: filepath.Join(dir, "evolve"), // 不设置 ApprovalFunc,默认拒绝 }) proposal := &EvolutionProposal{ Type: EvolveNewTool, Title: "Test", Description: "Test", } err := e.Propose(context.Background(), proposal) if err != nil { t.Fatalf("提交提案不应返回错误: %v", err) } if proposal.Status != StatusRejected { t.Errorf("无审批函数应被拒绝, 实际状态: %q", proposal.Status) } } // TestEvolver_Propose_Approved 测试提案被批准 func TestEvolver_Propose_Approved(t *testing.T) { dir := t.TempDir() // 创建一个自动批准的审批函数 autoApprove := func(ctx context.Context, p *EvolutionProposal) (bool, error) { return true, nil } e, _ := NewEvolver(&Config{ StoreDir: filepath.Join(dir, "evolve"), ApprovalFunc: autoApprove, }) // 提交一个技能学习提案 skillDef := SkillDefinition{ Name: "test-skill", Description: "A test skill", WhenToUse: "when testing", Prompt: "do the testing", } proposal := &EvolutionProposal{ Type: EvolveNewSkill, Title: "Learn test skill", Content: skillDef, } err := e.Propose(context.Background(), proposal) if err != nil { t.Fatalf("提案失败: %v", err) } if proposal.Status != StatusApplied { t.Errorf("应被批准并应用, 实际状态: %q", proposal.Status) } } // TestToolBuilder_Validate 测试工具定义验证 func TestToolBuilder_Validate(t *testing.T) { dir := t.TempDir() store, _ := NewEvolutionStore(dir) tb := NewToolBuilder(store, 5) tests := []struct { name string def *ToolDefinition wantErr bool }{ {"空名称", &ToolDefinition{Description: "d", ExecutionType: ExecScript, Script: "echo"}, true}, {"名称含空格", &ToolDefinition{Name: "my tool", Description: "d", ExecutionType: ExecScript, Script: "echo"}, true}, {"空描述", &ToolDefinition{Name: "tool1", ExecutionType: ExecScript, Script: "echo"}, true}, {"内置工具名", &ToolDefinition{Name: "Bash", Description: "d", ExecutionType: ExecScript, Script: "echo"}, true}, {"script 类型缺脚本", &ToolDefinition{Name: "tool1", Description: "d", ExecutionType: ExecScript}, true}, {"command 类型缺命令", &ToolDefinition{Name: "tool1", Description: "d", ExecutionType: ExecCommand}, true}, {"未知执行类型", &ToolDefinition{Name: "tool1", Description: "d", ExecutionType: "unknown"}, true}, {"正常 script", &ToolDefinition{Name: "tool1", Description: "d", ExecutionType: ExecScript, Script: "echo hi"}, false}, {"正常 command", &ToolDefinition{Name: "tool1", Description: "d", ExecutionType: ExecCommand, Command: "echo {{.name}}"}, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { err := tb.validate(tt.def) if tt.wantErr && err == nil { t.Error("应报错") } if !tt.wantErr && err != nil { t.Errorf("不应报错: %v", err) } }) } } // TestToolBuilder_SaveAndLoad 测试工具保存和加载 func TestToolBuilder_SaveAndLoad(t *testing.T) { dir := t.TempDir() store, _ := NewEvolutionStore(dir) tb := NewToolBuilder(store, 5) def := &ToolDefinition{ Name: "my-tool", Description: "A custom tool", ExecutionType: ExecScript, Script: "echo hello", Version: 1, } if err := tb.save(def); err != nil { t.Fatalf("保存失败: %v", err) } // 加载 defs, err := tb.LoadAll() if err != nil { t.Fatalf("加载失败: %v", err) } if len(defs) != 1 { t.Fatalf("期望 1 个定义, 实际 %d", len(defs)) } if defs[0].Name != "my-tool" { t.Errorf("Name 不匹配: %q", defs[0].Name) } } // TestToolBuilder_SessionLimit 测试会话限制 func TestToolBuilder_SessionLimit(t *testing.T) { dir := t.TempDir() store, _ := NewEvolutionStore(dir) tb := NewToolBuilder(store, 1) // 限制为 1 // 第一次应成功 tb.created = 1 // 模拟已创建 1 个 err := tb.Apply(context.Background(), &EvolutionProposal{ Content: ToolDefinition{Name: "tool2", Description: "d", ExecutionType: ExecScript, Script: "echo"}, }) if err == nil { t.Error("超出会话限制应报错") } if !strings.Contains(err.Error(), "session limit") { t.Errorf("错误信息应包含 session limit: %v", err) } } // TestSkillLearner_SaveAndLoad 测试技能保存和加载 func TestSkillLearner_SaveAndLoad(t *testing.T) { dir := t.TempDir() store, _ := NewEvolutionStore(dir) sl := NewSkillLearner(store, 10) def := &SkillDefinition{ Name: "debug-skill", Description: "How to debug", WhenToUse: "when errors occur", Prompt: "First check logs, then...", Version: 1, } if err := sl.save(def); err != nil { t.Fatalf("保存失败: %v", err) } skills, errs := sl.LoadAll() if len(errs) > 0 { t.Fatalf("加载失败: %v", errors.Join(errs...)) } if len(skills) != 1 { t.Fatalf("期望 1 个技能, 实际 %d", len(skills)) } if skills[0].Name != "debug-skill" { t.Errorf("Name 不匹配: %q", skills[0].Name) } } // TestRuntimeTool 测试运行时工具执行 func TestRuntimeTool(t *testing.T) { def := &ToolDefinition{ Name: "hello-tool", Description: "Says hello", ExecutionType: ExecScript, Script: "echo hello from tool", InputSchema: json.RawMessage(`{"type":"object"}`), } rt := NewRuntimeTool(def, t.TempDir(), execenv.DefaultExecutor{}) if rt.Name() != "hello-tool" { t.Errorf("Name: %q", rt.Name()) } result, err := rt.Execute(context.Background(), json.RawMessage(`{}`), nil) if err != nil { t.Fatalf("执行失败: %v", err) } if result.IsError { t.Errorf("不应标记为错误: %s", result.Output) } if !strings.Contains(result.Output, "hello from tool") { t.Errorf("输出不匹配: %q", result.Output) } } // TestRuntimeTool_CommandTemplate 测试命令模板执行 // 精妙之处(CLEVER): 参数通过环境变量 $TOOL_INPUT_NAME 传递,不做字符串替换, // 防止命令注入.命令模板中直接用 shell 变量引用参数. func TestRuntimeTool_CommandTemplate(t *testing.T) { def := &ToolDefinition{ Name: "greet-tool", Description: "Greets someone", ExecutionType: ExecCommand, Command: `echo "Hello $TOOL_INPUT_NAME"`, InputSchema: json.RawMessage(`{"type":"object"}`), } rt := NewRuntimeTool(def, t.TempDir(), execenv.DefaultExecutor{}) result, err := rt.Execute(context.Background(), json.RawMessage(`{"name":"World"}`), nil) if err != nil { t.Fatalf("执行失败: %v", err) } if !strings.Contains(result.Output, "Hello World") { t.Errorf("环境变量传参失败: %q", result.Output) } // 精妙之处(CLEVER): 验证命令注入被防御. // 参数 "World; echo INJECTED" 如果被字符串拼接会执行两条命令, // 但通过环境变量传递时,双引号保护使其成为一个完整字符串, // 输出只有一行 "Hello World; echo INJECTED",而不是两行. malicious, err := rt.Execute(context.Background(), json.RawMessage(`{"name":"World; echo INJECTED"}`), nil) if err != nil { t.Fatalf("执行失败: %v", err) } lines := strings.Split(strings.TrimSpace(malicious.Output), "\n") if len(lines) != 1 { t.Errorf("命令注入防御失败!期望 1 行输出,实际 %d 行: %q", len(lines), malicious.Output) } } // TestEvolver_History 测试历史查询 func TestEvolver_History(t *testing.T) { dir := t.TempDir() e, _ := NewEvolver(&Config{StoreDir: filepath.Join(dir, "evolve")}) // 提交几个提案 for i := 0; i < 3; i++ { e.Propose(context.Background(), &EvolutionProposal{ Type: EvolveNewSkill, Title: "test", }) } history, err := e.History() if err != nil { t.Fatalf("查询历史失败: %v", err) } if len(history) != 3 { t.Errorf("期望 3 条历史, 实际 %d", len(history)) } } // TestEvolutionStore_EmptyList 测试空列表 func TestEvolutionStore_EmptyList(t *testing.T) { dir := t.TempDir() store, _ := NewEvolutionStore(dir) // 清空 proposals 目录 os.RemoveAll(filepath.Join(dir, "proposals")) os.MkdirAll(filepath.Join(dir, "proposals"), 0755) proposals, err := store.ListProposals() if err != nil { t.Fatalf("列出失败: %v", err) } if len(proposals) != 0 { t.Errorf("空目录应返回 0 个提案, 实际 %d", len(proposals)) } } // --------------------------------------------------------------------------- // AutoApproveReadOnly wire regression guards // --------------------------------------------------------------------------- // recordingEvolveObserver captures events for assertion. // // recordingEvolveObserver 捕获事件供断言. type recordingEvolveObserver struct { events []struct { name string data map[string]any } } func (r *recordingEvolveObserver) Event(name string, data map[string]any) { r.events = append(r.events, struct { name string data map[string]any }{name: name, data: data}) } func (r *recordingEvolveObserver) Error(err error, ctx map[string]any) {} func (r *recordingEvolveObserver) hasEvent(name string) bool { for _, e := range r.events { if e.name == name { return true } } return false } // TestEvolver_Propose_AutoApproveReadOnly_ApprovesSkillWithoutApprovalFunc // locks the wire: AutoApproveReadOnly=true lets an EvolveNewSkill proposal // reach StatusApproved *without* an ApprovalFunc -- the previous behavior // (pre-wire) would reject (nil ApprovalFunc = reject) even with the flag // set, because Propose never read AutoApproveReadOnly. // // TestEvolver_Propose_AutoApproveReadOnly_ApprovesSkillWithoutApprovalFunc // 锁 wire: AutoApproveReadOnly=true 让 EvolveNewSkill 提案**无** // ApprovalFunc 也到达 StatusApproved -- wire 前行为 (nil ApprovalFunc= // 拒绝) 即便设此标志也会拒绝, 因为 Propose 从未读 AutoApproveReadOnly. func TestEvolver_Propose_AutoApproveReadOnly_ApprovesSkillWithoutApprovalFunc(t *testing.T) { dir := t.TempDir() obs := &recordingEvolveObserver{} e, _ := NewEvolver(&Config{ StoreDir: filepath.Join(dir, "evolve"), AutoApproveReadOnly: true, Observer: obs, // ApprovalFunc: nil on purpose — prove the flag bypasses it. // ApprovalFunc 刻意为 nil -- 证明标志绕过它. }) proposal := &EvolutionProposal{ Type: EvolveNewSkill, Title: "skill-auto-approved", Content: SkillDefinition{ Name: "auto-test", Description: "auto approve test", WhenToUse: "always", Prompt: "do it", }, } if err := e.Propose(context.Background(), proposal); err != nil { t.Fatalf("Propose returned error: %v", err) } // Must be approved-and-applied, not rejected. // 必须是 approved-and-applied, 不是 rejected. if proposal.Status == StatusRejected { t.Errorf("proposal status = %q, want non-rejected (auto-approval path)", proposal.Status) } if !obs.hasEvent("evolution_auto_approved") { t.Errorf("expected evolution_auto_approved event; events=%+v", obs.events) } } // TestEvolver_Propose_AutoApproveReadOnly_DoesNotBypassForNonReadOnlyTypes // locks the safety boundary: AutoApproveReadOnly=true must NOT short-circuit // EvolveNewTool / EvolveOptimize / EvolveSelfAdjust, because their content // executes code or mutates runtime. A regression that widens // isReadOnlyEvolution would fail here. // // TestEvolver_Propose_AutoApproveReadOnly_DoesNotBypassForNonReadOnlyTypes // 锁安全边界: AutoApproveReadOnly=true 不得 short-circuit EvolveNewTool / // EvolveOptimize / EvolveSelfAdjust, 因其内容执行代码或改运行时. 若有 // regression 放宽 isReadOnlyEvolution, 此 test 会失败. func TestEvolver_Propose_AutoApproveReadOnly_DoesNotBypassForNonReadOnlyTypes(t *testing.T) { for _, evolType := range []EvolutionType{EvolveNewTool, EvolveOptimize, EvolveSelfAdjust} { t.Run(string(evolType), func(t *testing.T) { dir := t.TempDir() obs := &recordingEvolveObserver{} e, _ := NewEvolver(&Config{ StoreDir: filepath.Join(dir, "evolve"), AutoApproveReadOnly: true, Observer: obs, // nil ApprovalFunc → reject by default (safe). // nil ApprovalFunc → 默认拒绝 (安全). }) proposal := &EvolutionProposal{ Type: evolType, Title: "should-not-auto-approve", } _ = e.Propose(context.Background(), proposal) if proposal.Status != StatusRejected { t.Errorf("%s: status = %q, want StatusRejected (AutoApproveReadOnly must not bypass non-read-only types)", evolType, proposal.Status) } if obs.hasEvent("evolution_auto_approved") { t.Errorf("%s: evolution_auto_approved event must not fire for non-read-only evolution", evolType) } }) } } // TestEvolver_Propose_AutoApproveReadOnly_FalseKeepsHumanGate locks the // backward-compat promise: AutoApproveReadOnly=false (the default) means // every proposal — including read-only skills — still goes through // ApprovalFunc. Regressions that invert the flag's default semantics // would fail here. // // TestEvolver_Propose_AutoApproveReadOnly_FalseKeepsHumanGate 锁向后 // 兼容: AutoApproveReadOnly=false (默认) 时, 每个提案 -- 包括只读技能 // -- 仍走 ApprovalFunc. 若有 regression 倒转此标志默认语义, 此 test 失败. func TestEvolver_Propose_AutoApproveReadOnly_FalseKeepsHumanGate(t *testing.T) { dir := t.TempDir() obs := &recordingEvolveObserver{} approvalCalled := false approvalFn := func(ctx context.Context, p *EvolutionProposal) (bool, error) { approvalCalled = true return true, nil } e, _ := NewEvolver(&Config{ StoreDir: filepath.Join(dir, "evolve"), AutoApproveReadOnly: false, // default value, explicit for clarity ApprovalFunc: approvalFn, Observer: obs, }) proposal := &EvolutionProposal{ Type: EvolveNewSkill, Title: "still-requires-human", Content: SkillDefinition{ Name: "gated", Description: "must pass through approvalFunc", WhenToUse: "always", Prompt: "do it", }, } if err := e.Propose(context.Background(), proposal); err != nil { t.Fatalf("Propose returned error: %v", err) } if !approvalCalled { t.Error("ApprovalFunc must still be called when AutoApproveReadOnly=false") } if obs.hasEvent("evolution_auto_approved") { t.Error("evolution_auto_approved must not fire when flag is false") } }