package engine import ( "strings" "testing" "git.flytoex.net/yuanwei/flyto-agent/pkg/query" ) // ============================================================ // ToolResultPairingNormalizer 测试 // ============================================================ // --- Case 1: tool_use 无 tool_result → 注入合成 tool_result --- func TestToolResultPairing_Case1_MissingToolResult(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentText, Text: "hello"}, }, }, { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_002", Name: "grep"}, }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "ok"}, // tu_002 缺失 }, }, } result := n.Normalize(messages) // 应该注入一条合成的 tool_result 消息 if len(result) != 4 { t.Fatalf("expected 4 messages (3 original + 1 synthetic), got %d", len(result)) } // 最后一条应该是合成的 tool_result lastMsg := result[3] if lastMsg.Role != query.RoleUser { t.Error("synthetic tool_result should be in a user message") } if len(lastMsg.Content) != 1 { t.Fatalf("expected 1 content block in synthetic message, got %d", len(lastMsg.Content)) } if lastMsg.Content[0].Type != query.ContentToolResult { t.Error("synthetic content should be tool_result type") } if lastMsg.Content[0].ToolUseID != "tu_002" { t.Errorf("expected synthetic tool_result for tu_002, got %s", lastMsg.Content[0].ToolUseID) } if lastMsg.Content[0].Text != syntheticToolResultText { t.Error("synthetic tool_result should use standard text") } if !lastMsg.Content[0].IsError { t.Error("synthetic tool_result should be marked as error") } // 验证 metadata if lastMsg.Metadata == nil || lastMsg.Metadata["synthetic"] != true { t.Error("synthetic message should have synthetic=true metadata") } // 验证 observer 事件 if obs.EventCount("tool_result_pairing_repaired") != 1 { t.Error("expected 1 tool_result_pairing_repaired event") } } func TestToolResultPairing_Case1_MultipleMissingToolResults(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_002", Name: "grep"}, {Type: query.ContentToolUse, ID: "tu_003", Name: "glob"}, }, }, // 没有任何 tool_result } result := n.Normalize(messages) // 应该追加一条合成消息,包含 3 个 tool_result if len(result) != 2 { t.Fatalf("expected 2 messages (1 original + 1 synthetic), got %d", len(result)) } lastMsg := result[1] if len(lastMsg.Content) != 3 { t.Fatalf("expected 3 synthetic tool_results, got %d", len(lastMsg.Content)) } // 验证 observer 事件中的修复数量 evt := obs.LastEvent("tool_result_pairing_repaired") if evt == nil { t.Fatal("expected tool_result_pairing_repaired event") } repairCount, ok := evt.Data["repair_count"].(int) if !ok { t.Fatal("repair_count should be int") } if repairCount != 3 { t.Errorf("expected 3 repairs, got %d", repairCount) } } // --- Case 2: tool_result 无 tool_use → 委托给 OrphanToolResultRemover --- func TestToolResultPairing_Case2_OrphanToolResult(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "nonexistent", Text: "orphan"}, }, }, } result := n.Normalize(messages) // case 2 委托给 OrphanToolResultRemover,本步骤只统计不处理 // 消息仍在(因为本步骤不删除孤立 tool_result) if len(result) != 1 { t.Fatalf("expected 1 message (case 2 delegated), got %d", len(result)) } // 但应该记录诊断事件 if obs.EventCount("tool_result_pairing_repaired") != 1 { t.Error("expected repaired event for orphan detection") } } // --- Case 3: 重复 tool_use ID → 去重 --- func TestToolResultPairing_Case3_DuplicateToolUseID(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, // 重复 }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "ok"}, }, }, } result := n.Normalize(messages) // assistant 消息应该只保留一个 tool_use if len(result[0].Content) != 1 { t.Errorf("expected 1 tool_use (deduped), got %d", len(result[0].Content)) } // 验证 observer 事件 evt := obs.LastEvent("tool_result_pairing_repaired") if evt == nil { t.Fatal("expected repair event") } repairs, ok := evt.Data["repairs"].([]string) if !ok { t.Fatal("repairs should be []string") } found := false for _, r := range repairs { if strings.HasPrefix(r, "duplicate_tool_use:") { found = true break } } if !found { t.Error("expected duplicate_tool_use repair record") } } // --- Case 4: 重复 tool_result ID → 去重 --- func TestToolResultPairing_Case4_DuplicateToolResultID(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "first"}, {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "duplicate"}, // 重复 }, }, } result := n.Normalize(messages) // user 消息应该只保留一个 tool_result if len(result[1].Content) != 1 { t.Errorf("expected 1 tool_result (deduped), got %d", len(result[1].Content)) } if result[1].Content[0].Text != "first" { t.Error("first tool_result should be kept") } } // --- 混合 case 测试 --- func TestToolResultPairing_MixedCases(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, // case 3: 重复 tool_use {Type: query.ContentToolUse, ID: "tu_002", Name: "grep"}, // case 1: 无 tool_result }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "ok"}, {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "dup"}, // case 4: 重复 tool_result {Type: query.ContentToolResult, ToolUseID: "tu_999", Text: "orphan"}, // case 2: 孤立 }, }, } result := n.Normalize(messages) // 验证去重后的 assistant 消息 assistantToolUses := 0 for _, c := range result[0].Content { if c.Type == query.ContentToolUse { assistantToolUses++ } } if assistantToolUses != 2 { t.Errorf("expected 2 unique tool_uses, got %d", assistantToolUses) } // 验证去重后的 user 消息(tu_001 保留 1 个,tu_999 保留 1 个) userToolResults := 0 for _, c := range result[1].Content { if c.Type == query.ContentToolResult { userToolResults++ } } if userToolResults != 2 { t.Errorf("expected 2 unique tool_results in user message, got %d", userToolResults) } // 验证 tu_002 的合成 tool_result(最后一条消息) lastMsg := result[len(result)-1] hasSynthetic := false for _, c := range lastMsg.Content { if c.Type == query.ContentToolResult && c.ToolUseID == "tu_002" { hasSynthetic = true if c.Text != syntheticToolResultText { t.Error("synthetic tool_result should use standard text") } } } if !hasSynthetic { t.Error("expected synthetic tool_result for tu_002") } } // --- 无修复 case --- func TestToolResultPairing_NoRepairNeeded(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentText, Text: "hello"}, }, }, { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "ok"}, }, }, { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentText, Text: "done"}, }, }, } result := n.Normalize(messages) if len(result) != 4 { t.Fatalf("expected 4 messages (no changes), got %d", len(result)) } // 无修复不应该记录事件 if len(obs.Events) != 0 { t.Errorf("expected 0 events when no repair needed, got %d", len(obs.Events)) } } func TestToolResultPairing_EmptyMessages(t *testing.T) { n := &ToolResultPairingNormalizer{} result := n.Normalize(nil) if result != nil { t.Error("nil input should return nil") } result = n.Normalize([]query.Message{}) if len(result) != 0 { t.Error("empty input should return empty") } } // --- 严格模式测试 --- func TestToolResultPairing_StrictMode_Panics(t *testing.T) { obs := &MockObserver{} strict := &StrictMode{ToolResultPairing: true} n := &ToolResultPairingNormalizer{Observer: obs, StrictMode: strict} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, }, }, // 缺少 tool_result → 触发修复 → 严格模式 panic } defer func() { r := recover() if r == nil { t.Fatal("expected panic in strict mode") } msg := r.(string) if !strings.Contains(msg, "strict mode violation") { t.Errorf("expected strict mode violation in panic message, got: %s", msg) } }() n.Normalize(messages) } func TestToolResultPairing_StrictMode_Nil(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs, StrictMode: nil} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, }, }, } // nil StrictMode 不应该 panic result := n.Normalize(messages) if len(result) == 0 { t.Error("should produce output even without strict mode") } } // --- Observer nil 安全测试 --- func TestToolResultPairing_NilObserver(t *testing.T) { n := &ToolResultPairingNormalizer{Observer: nil} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, }, }, } // nil Observer 不应该 panic(内部会用 NoopObserver) result := n.Normalize(messages) if len(result) == 0 { t.Error("should produce output even without observer") } } // --- Name 和 Priority 测试 --- func TestToolResultPairing_NameAndPriority(t *testing.T) { n := &ToolResultPairingNormalizer{} if n.Name() != "tool_result_pairing" { t.Errorf("expected name 'tool_result_pairing', got '%s'", n.Name()) } if n.Priority() != 8 { t.Errorf("expected priority 8, got %d", n.Priority()) } } // --- 诊断快照测试 --- func TestBuildDiagnosticDetail_Empty(t *testing.T) { result := buildDiagnosticDetail(nil) if result != "" { t.Errorf("expected empty string for nil messages, got '%s'", result) } } func TestBuildDiagnosticDetail_MixedMessages(t *testing.T) { messages := []query.Message{ { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentText, Text: "hello"}, }, }, { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_002", Name: "grep"}, }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "ok"}, }, }, } result := buildDiagnosticDetail(messages) if !strings.Contains(result, "[0] user(text)") { t.Errorf("expected user text info, got: %s", result) } if !strings.Contains(result, "tool_use=[tu_001,tu_002]") { t.Errorf("expected tool_use IDs, got: %s", result) } if !strings.Contains(result, "tool_result=[tu_001]") { t.Errorf("expected tool_result IDs, got: %s", result) } } func TestBuildDiagnosticDetail_NoSensitiveData(t *testing.T) { messages := []query.Message{ { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentText, Text: "my secret API key is sk-12345"}, }, }, } result := buildDiagnosticDetail(messages) // 诊断信息不应包含消息内容 if strings.Contains(result, "sk-12345") { t.Error("diagnostic should not contain message content (sensitive data)") } if strings.Contains(result, "secret") { t.Error("diagnostic should not contain message content") } } // --- tool_use 无 ID 的边界 case --- func TestToolResultPairing_ToolUseWithoutID(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "", Name: "bash"}, // 无 ID }, }, } // 无 ID 的 tool_use 应该被保留,不触发修复 result := n.Normalize(messages) if len(result) != 1 { t.Fatalf("expected 1 message, got %d", len(result)) } if len(result[0].Content) != 1 { t.Error("tool_use without ID should be preserved") } } // --- 消息去重后变空的 case --- func TestToolResultPairing_MessageBecomesEmptyAfterDedup(t *testing.T) { obs := &MockObserver{} n := &ToolResultPairingNormalizer{Observer: obs} messages := []query.Message{ { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, }, }, { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, // 重复,去重后消息为空 }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "ok"}, }, }, } result := n.Normalize(messages) // 去重后第二条 assistant 消息应该被移除(因为内容为空) if len(result) != 2 { t.Fatalf("expected 2 messages after dedup, got %d", len(result)) } } // --- 与 DefaultNormalizePipeline 集成测试 --- func TestToolResultPairing_InDefaultPipeline(t *testing.T) { obs := &MockObserver{} pipeline := DefaultNormalizePipelineWithObserver(obs, nil) messages := []query.Message{ { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentText, Text: "hello"}, }, }, { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_001", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_002", Name: "grep"}, }, }, { Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_001", Text: "ok"}, // tu_002 缺失 → case 1 注入合成 tool_result {Type: query.ContentToolResult, ToolUseID: "tu_999", Text: "orphan"}, // tu_999 孤立 → case 2 由 OrphanToolResultRemover 处理 }, }, { Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentText, Text: "done"}, }, }, } result := pipeline.Run(messages) // 验证最终消息序列合法: // 1. tu_002 有合成的 tool_result // 2. tu_999 的孤立 tool_result 被 OrphanToolResultRemover 移除 hasTu002Result := false hasTu999Result := false for _, msg := range result { for _, c := range msg.Content { if c.Type == query.ContentToolResult { if c.ToolUseID == "tu_002" { hasTu002Result = true } if c.ToolUseID == "tu_999" { hasTu999Result = true } } } } if !hasTu002Result { t.Error("tu_002 should have synthetic tool_result") } if hasTu999Result { t.Error("tu_999 orphan tool_result should be removed by OrphanToolResultRemover") } // 验证 observer 记录了修复事件 if obs.EventCount("tool_result_pairing_repaired") != 1 { t.Errorf("expected 1 repair event, got %d", obs.EventCount("tool_result_pairing_repaired")) } } // --- L1171: 幂等性回归测试 --- // // TestToolResultPairing_Idempotent 锁定 ToolResultPairingNormalizer 对已修复消息 // 再次 Normalize 的结果必须与一次 Normalize 相同 (幂等). // // 背景: L1171 TODO 原本建议"应检查 Metadata['synthetic'] 跳过", 但 audit 发现这是 // false positive--代码已经通过"基于 ID 的去重集合"结构性保证了幂等性: // // Run 1 输入: [tool_use A, tool_use B, tool_result A] // Run 1 扫描: toolUseIDs={A,B}, toolResultIDs={A}, missing=[B] // Run 1 追加: synthetic tool_result B // Run 1 输出: [原 3 条, synthetic tool_result B] ← 4 条 // // Run 2 输入: Run 1 的 4 条 // Run 2 扫描: toolUseIDs={A,B}, toolResultIDs={A,B} ← B 因 synthetic 进入集合 // Run 2 missing: [] ← 无需注入 // Run 2 输出: 与输入完全一致 ← 幂等 // // 本测试存在的意义: **锁定不变量防未来回归**.如果有人重构阶段 3 误改幂等性 // (比如改成从 result 重新扫描 toolResultIDs 但漏掉 synthetic 追加行), 本测试会立刻 fail. // // 精妙之处(CLEVER): 不只是"跑两遍断言相等", 而是逐 case 覆盖 4 种 repair 路径都幂等-- // 单轮和组合场景都必须幂等, 否则 edge case 会漏网. func TestToolResultPairing_Idempotent(t *testing.T) { // 辅助: 把消息的关键结构序列化为稳定字符串, 用于比较两次 Normalize 结果相等. // 直接 reflect.DeepEqual 对 Metadata map 的 any 值有细微差异, 字符串摘要更可靠. digest := func(msgs []query.Message) string { var sb strings.Builder for _, m := range msgs { sb.WriteString("[") sb.WriteString(string(m.Role)) sb.WriteString("]") for _, c := range m.Content { sb.WriteString("{") sb.WriteString(string(c.Type)) if c.ID != "" { sb.WriteString(":id=") sb.WriteString(c.ID) } if c.ToolUseID != "" { sb.WriteString(":tu=") sb.WriteString(c.ToolUseID) } if c.Text != "" { sb.WriteString(":txt=") sb.WriteString(c.Text) } sb.WriteString("}") } } return sb.String() } cases := []struct { name string in []query.Message }{ { name: "case1_missing_tool_result", in: []query.Message{ {Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_A", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_B", Name: "read"}, }}, {Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_A", Text: "ok"}, }}, }, }, { name: "case3_duplicate_tool_use", in: []query.Message{ {Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_X", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_X", Name: "bash"}, // 重复 }}, {Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_X", Text: "ok"}, }}, }, }, { name: "case4_duplicate_tool_result", in: []query.Message{ {Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_Y", Name: "bash"}, }}, {Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_Y", Text: "first"}, {Type: query.ContentToolResult, ToolUseID: "tu_Y", Text: "dup"}, // 重复 }}, }, }, { name: "mixed_missing_and_duplicate", in: []query.Message{ {Role: query.RoleAssistant, Content: []query.Content{ {Type: query.ContentToolUse, ID: "tu_M1", Name: "bash"}, {Type: query.ContentToolUse, ID: "tu_M2", Name: "read"}, {Type: query.ContentToolUse, ID: "tu_M1", Name: "bash"}, // case 3 重复 }}, {Role: query.RoleUser, Content: []query.Content{ {Type: query.ContentToolResult, ToolUseID: "tu_M1", Text: "ok"}, // tu_M2 缺 tool_result → case 1 会注入 synthetic }}, }, }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { n := &ToolResultPairingNormalizer{Observer: &MockObserver{}} // Run 1: 首次 Normalize run1 := n.Normalize(tc.in) d1 := digest(run1) // Run 2: 对 run1 的输出再 Normalize, 必须与 run1 完全一致 run2 := n.Normalize(run1) d2 := digest(run2) if d1 != d2 { t.Errorf("not idempotent:\nrun1: %s\nrun2: %s", d1, d2) } // 同时验证 Run 3 也幂等 (防"第 2 次稳定但第 3 次漂移"的诡异场景) run3 := n.Normalize(run2) d3 := digest(run3) if d2 != d3 { t.Errorf("not idempotent on 3rd run:\nrun2: %s\nrun3: %s", d2, d3) } }) } }