package engine import ( "strings" "testing" "git.flytoex.net/yuanwei/flyto-agent/pkg/query" "git.flytoex.net/yuanwei/flyto-agent/pkg/validator" ) // TestExtractFinalAssistantText_TextOnly verifies plain text blocks // are concatenated in order. // // TestExtractFinalAssistantText_TextOnly 验证纯文本块按顺序拼接. func TestExtractFinalAssistantText_TextOnly(t *testing.T) { content := []query.Content{ {Type: query.ContentText, Text: "hello "}, {Type: query.ContentText, Text: "world"}, } got := extractFinalAssistantText(content) if got != "hello world" { t.Errorf("got %q, want %q", got, "hello world") } } // TestExtractFinalAssistantText_SkipsThinking verifies internal // reasoning blocks are excluded from validator input -- consumers // should validate the final answer, not chain-of-thought. // // TestExtractFinalAssistantText_SkipsThinking 验证内部推理块不计入 // validator 输入 -- 消费方应当验证最终答复, 而非思维链. func TestExtractFinalAssistantText_SkipsThinking(t *testing.T) { content := []query.Content{ {Type: query.ContentThinking, Text: "let me think"}, {Type: query.ContentText, Text: "final answer"}, } got := extractFinalAssistantText(content) if got != "final answer" { t.Errorf("got %q, want %q (thinking should be skipped)", got, "final answer") } } // TestExtractFinalAssistantText_SkipsToolUse verifies tool_use blocks // are excluded -- they are not text the model claims as the answer. // // TestExtractFinalAssistantText_SkipsToolUse 验证 tool_use 块不计入 // -- 工具调用不是模型给出的最终文本答复. func TestExtractFinalAssistantText_SkipsToolUse(t *testing.T) { content := []query.Content{ {Type: query.ContentText, Text: "calling tool: "}, {Type: query.ContentToolUse, ToolUseID: "abc", Text: ""}, {Type: query.ContentText, Text: "done"}, } got := extractFinalAssistantText(content) if got != "calling tool: done" { t.Errorf("got %q, want %q (tool_use should be skipped)", got, "calling tool: done") } } // TestExtractFinalAssistantText_Empty verifies empty input returns // empty string (used to short-circuit ResponseReflector dispatch). // // TestExtractFinalAssistantText_Empty 验证空输入返回空串 // (用于短路 ResponseReflector 调用). func TestExtractFinalAssistantText_Empty(t *testing.T) { if got := extractFinalAssistantText(nil); got != "" { t.Errorf("nil content: got %q, want empty", got) } if got := extractFinalAssistantText([]query.Content{}); got != "" { t.Errorf("empty slice: got %q, want empty", got) } // All-thinking content -- no ContentText -> empty. thinking := []query.Content{{Type: query.ContentThinking, Text: "..."}} if got := extractFinalAssistantText(thinking); got != "" { t.Errorf("all-thinking: got %q, want empty", got) } } // TestConfig_ResponseReflector_FieldType is a compile-time assertion // that Config.ResponseReflector implements validator.StructuralValidator // (sealed sub-interface, ADR-0008 v2 § engine-level reflector contract). // If the field type drifts back to validator.Validator (which would // allow LLM-backed business validators to slot in), this test fails // to compile. // // TestConfig_ResponseReflector_FieldType 是编译期断言: 确认 // Config.ResponseReflector 实现 validator.StructuralValidator // (sealed 子接口, ADR-0008 v2 § 引擎层反射器契约). 字段类型漂回 // validator.Validator (会让 LLM-backed 业务校验器又能挂槽位) 时 // 此测试无法编译. func TestConfig_ResponseReflector_FieldType(t *testing.T) { var _ validator.StructuralValidator = (Config{}).ResponseReflector } // TestConfig_ResponseReflectorMaxBlocks_FieldType is a compile-time // assertion on the cap counter type. Zero value semantics: 0 = // unlimited (legacy infinite-retry behavior); >0 = graceful break // after that many blocks. // // TestConfig_ResponseReflectorMaxBlocks_FieldType 编译期断言: 上限 // 计数字段类型. 零值语义: 0 = 无限 (旧行为); >0 = 累计达此值时 // graceful break. func TestConfig_ResponseReflectorMaxBlocks_FieldType(t *testing.T) { c := Config{ResponseReflectorMaxBlocks: 3} if c.ResponseReflectorMaxBlocks != 3 { t.Errorf("ResponseReflectorMaxBlocks=%d, want 3", c.ResponseReflectorMaxBlocks) } } // TestConfig_Temperature_FieldType is a compile-time assertion on the // sampling-temperature field type (*float64). Nil = use provider/model // default; non-nil = pass-through to flyto.Request.Temperature. The // pointer indirection cleanly separates "unset" (nil) from "set to 0" // (deterministic sampling, a legitimate value). // // TestConfig_Temperature_FieldType 编译期断言: 采样温度字段类型 // (*float64). nil = 用 provider/模型默认; 非 nil = 透传 flyto.Request. // Temperature. 指针间接干净分开"未设" (nil) 与"设为 0" (deterministic // 采样, 合法值). func TestConfig_Temperature_FieldType(t *testing.T) { v := 0.2 c := Config{Temperature: &v} if c.Temperature == nil || *c.Temperature != 0.2 { t.Errorf("Temperature=%v, want *0.2", c.Temperature) } var _ *float64 = (Config{}).Temperature } // TestConfig_TopP_FieldType is a compile-time assertion on the // nucleus-sampling cutoff field type (*float64). Same nil / non-nil // semantics as Temperature. // // TestConfig_TopP_FieldType 编译期断言: nucleus 采样阈值字段类型 // (*float64). 与 Temperature 同 nil / 非 nil 语义. func TestConfig_TopP_FieldType(t *testing.T) { v := 0.95 c := Config{TopP: &v} if c.TopP == nil || *c.TopP != 0.95 { t.Errorf("TopP=%v, want *0.95", c.TopP) } var _ *float64 = (Config{}).TopP } // TestDetectDuplicateTextBlocks_NoDuplicate verifies a single text // block returns dup=false (the common healthy case). // // TestDetectDuplicateTextBlocks_NoDuplicate 验证单一文本块返回 // dup=false (健康场景). func TestDetectDuplicateTextBlocks_NoDuplicate(t *testing.T) { content := []query.Content{ {Type: query.ContentText, Text: "the answer is 42"}, } dup, count, sample := detectDuplicateTextBlocks(content) if dup || count != 0 || sample != "" { t.Errorf("got (%v, %d, %q), want (false, 0, \"\")", dup, count, sample) } } // TestDetectDuplicateTextBlocks_ExactDuplicate verifies two identical // blocks trigger fail-closed. Mirrors r14 main agent gpt-5.1 emitting // the verdict JSON twice in one final response. // // TestDetectDuplicateTextBlocks_ExactDuplicate 验证两个内容完全相同的 // 块触发 fail-closed. 对应 r14 主 agent gpt-5.1 把 verdict JSON 在一次 // final response 内输出两遍. func TestDetectDuplicateTextBlocks_ExactDuplicate(t *testing.T) { content := []query.Content{ {Type: query.ContentText, Text: `{"verdict":"retry","reason":"..."}`}, {Type: query.ContentText, Text: `{"verdict":"retry","reason":"..."}`}, } dup, count, sample := detectDuplicateTextBlocks(content) if !dup || count != 2 || sample != `{"verdict":"retry","reason":"..."}` { t.Errorf("got (%v, %d, %q), want (true, 2, full json)", dup, count, sample) } } // TestDetectDuplicateTextBlocks_MixedThinkingAndDuplicate verifies // thinking and tool_use blocks are excluded from the dedup pool; // only ContentText blocks count. Two duplicate ContentText still // trigger even when separated by thinking/tool_use. // // TestDetectDuplicateTextBlocks_MixedThinkingAndDuplicate 验证 // thinking 与 tool_use 块不计入去重池; 即便重复 ContentText 中间夹 // thinking/tool_use 也仍然触发. func TestDetectDuplicateTextBlocks_MixedThinkingAndDuplicate(t *testing.T) { content := []query.Content{ {Type: query.ContentThinking, Text: "let me check"}, {Type: query.ContentText, Text: "answer"}, {Type: query.ContentToolUse, ToolUseID: "abc"}, {Type: query.ContentText, Text: "answer"}, } dup, count, _ := detectDuplicateTextBlocks(content) if !dup || count != 2 { t.Errorf("got (%v, %d), want (true, 2)", dup, count) } } // TestDetectDuplicateTextBlocks_EmptySkipped verifies whitespace-only // or empty ContentText blocks do not falsely register as duplicates // (multiple empty deltas during streaming should not block the // response). // // TestDetectDuplicateTextBlocks_EmptySkipped 验证仅空白或空字符串的 // ContentText 块不会被当作重复 (流式过程中可能出现多个空 delta, 不应 // 错误地拒绝响应). func TestDetectDuplicateTextBlocks_EmptySkipped(t *testing.T) { content := []query.Content{ {Type: query.ContentText, Text: ""}, {Type: query.ContentText, Text: " "}, {Type: query.ContentText, Text: "real answer"}, } dup, _, _ := detectDuplicateTextBlocks(content) if dup { t.Errorf("empty/whitespace blocks falsely registered as duplicate") } } // TestDetectDuplicateTextBlocks_DifferentTexts verifies multiple text // blocks with different content (legitimate streaming chunks) do not // trigger fail-closed. // // TestDetectDuplicateTextBlocks_DifferentTexts 验证多个内容不同的 // ContentText 块 (合法流式分块) 不触发 fail-closed. func TestDetectDuplicateTextBlocks_DifferentTexts(t *testing.T) { content := []query.Content{ {Type: query.ContentText, Text: "first part: "}, {Type: query.ContentText, Text: "second part: "}, {Type: query.ContentText, Text: "third part."}, } dup, _, _ := detectDuplicateTextBlocks(content) if dup { t.Errorf("different texts falsely registered as duplicate") } } // TestDetectThinkingLoop_R15Pattern reproduces probe r15 minimax // thinking deadlock: trailing 1KB consists of `首 重 1 公斤 |` // repeated, total ~6 unique runes. Should trigger. // // TestDetectThinkingLoop_R15Pattern 复现 probe r15 minimax thinking // 死锁: 尾部 1KB 是 `首 重 1 公斤 |` 重复, 总计 ~6 unique runes. // 应触发. func TestDetectThinkingLoop_R15Pattern(t *testing.T) { loopUnit := "首 重 1 公斤 | " var sb strings.Builder for sb.Len() < 2048 { sb.WriteString(loopUnit) } content := []query.Content{ {Type: query.ContentThinking, Text: sb.String()}, } loop, _, ratio := detectThinkingLoop(content) if !loop { t.Errorf("r15 pattern should trigger loop, ratio=%.4f", ratio) } if ratio > thinkingLoopRatioThreshold { t.Errorf("ratio %.4f exceeds threshold %.4f, should be much lower", ratio, thinkingLoopRatioThreshold) } } // TestDetectThinkingLoop_HealthyReasoning verifies a normal // chain-of-thought block (mixed Chinese + English + digits + punctuation) // does not trigger. // // TestDetectThinkingLoop_HealthyReasoning 验证正常 chain-of-thought // 块 (中英文 + 数字 + 标点混合) 不触发. func TestDetectThinkingLoop_HealthyReasoning(t *testing.T) { healthy := "Let me analyze this pricing table. 第一段 0-0.5kg, 上海费用 1.25 元. " + "For 江苏 same as 上海. 我需要检查每个 region 的 fees 数组长度等于 segments.is_continuation=false 的段数. " + "Verify: row '内蒙古' has fees [6, 6, 6, 6, 0] which is 5 elements; segments has 5 entries; OK. " + "Now check rules.valid_until — original sheet says '2025年9月31号', invalid (Sep has 30 days max). " + "Correct to 2025-09-30 per the calendar normalization rule. " + "Strip_fees: 北京 has 首重 3kg, 1元/票, 续重 0.2 元/kg → base_weight=3000, base_amount=1.0. " + "All structural checks pass. Output JSON now." for len(healthy) < 1500 { healthy = healthy + " continued analysis." } content := []query.Content{ {Type: query.ContentThinking, Text: healthy}, } loop, _, ratio := detectThinkingLoop(content) if loop { t.Errorf("healthy reasoning falsely detected as loop, ratio=%.4f", ratio) } if ratio < 0.05 { t.Logf("warning: healthy reasoning ratio %.4f below 5%%; sample text quality may not represent typical case", ratio) } } // TestDetectThinkingLoop_BelowMinLength verifies thinking blocks // shorter than thinkingLoopMinLength are skipped (loop detection // would be noisy on tiny outputs). // // TestDetectThinkingLoop_BelowMinLength 验证短于 thinkingLoopMinLength // 的 thinking 块跳过检测 (短输出循环检测噪声大). func TestDetectThinkingLoop_BelowMinLength(t *testing.T) { short := strings.Repeat("a ", 100) // < 1024 bytes content := []query.Content{ {Type: query.ContentThinking, Text: short}, } loop, _, _ := detectThinkingLoop(content) if loop { t.Errorf("short thinking block should not trigger loop detection") } } // TestDetectThinkingLoop_MixedHealthyPrefix verifies the trailing- // window design catches mixed cases: legit reasoning prefix followed // by loop near the end (the actual r15 shape -- 5KB healthy then // looping). // // TestDetectThinkingLoop_MixedHealthyPrefix 验证尾部窗口设计抓混合 // case: 合法推理前缀 + 尾部死循环 (r15 实际形态 -- 5KB 健康后开始 // 循环). func TestDetectThinkingLoop_MixedHealthyPrefix(t *testing.T) { healthyPrefix := strings.Repeat("Let me analyze the pricing data carefully step by step with concrete examples and proper Chinese / English vocabulary mixed throughout. ", 30) loopTail := strings.Repeat("首 重 1 公斤 | ", 200) // ~3KB tail loop content := []query.Content{ {Type: query.ContentThinking, Text: healthyPrefix + loopTail}, } loop, _, ratio := detectThinkingLoop(content) if !loop { t.Errorf("mixed (healthy prefix + loop tail) should trigger via tail window, ratio=%.4f", ratio) } } // TestDetectThinkingLoop_NonThinkingIgnored verifies ContentText / // ContentToolUse blocks are not subjected to thinking loop detection // (each channel has its own guard: 6.4 dup blocks for ContentText, // 6.4.1 thinking loop for ContentThinking). // // TestDetectThinkingLoop_NonThinkingIgnored 验证 ContentText / // ContentToolUse 块不参与 thinking 循环检测 (每个频道独立守卫: // 6.4 dup blocks 给 ContentText, 6.4.1 thinking loop 给 ContentThinking). func TestDetectThinkingLoop_NonThinkingIgnored(t *testing.T) { loopText := strings.Repeat("a", 2048) content := []query.Content{ {Type: query.ContentText, Text: loopText}, {Type: query.ContentToolUse, ToolUseID: "x"}, } loop, _, _ := detectThinkingLoop(content) if loop { t.Errorf("ContentText / ContentToolUse should not trigger thinking-channel detection") } }