// minimax-probe - MiniMax 工具数量上限 + JSON Schema 支持深度探测. // // 探测目标: // 1. 工具数量上限(Tool Count Limit) // 从 1 个工具开始,每轮翻倍(1→2→4→8→16→32→64→128), // 找到第一个报错的数量后,在上下界之间二分搜索精确阈值. // 2. JSON Schema 复杂度(Schema Complexity) // 测试 Anthropic API 实际使用的各种 schema 特性: // a. 基础类型(string / number / boolean / array) // b. 嵌套对象(object with nested object) // c. oneOf / anyOf / enum // d. $ref / $defs(JSON Schema Draft-7 引用) // e. additionalProperties: false // f. 超长 description(>500 字符) // g. 深层嵌套(5 层 object) // h. 大型 schema(模拟 Bash/Edit 工具的真实规格) // 3. Native 端点 vs Anthropic 兼容端点 差异对比 // // 使用: // // source .env && go run ./cmd/minimax-probe/ [--model MiniMax-M2.7] [--mode native|anthropic] package main import ( "bufio" "context" "encoding/json" "flag" "fmt" "os" "strings" "time" "git.flytoex.net/yuanwei/flyto-agent/pkg/flyto" "git.flytoex.net/yuanwei/flyto-agent/pkg/providers/minimax" ) func main() { // 从 .env 补充缺失的环境变量 loadEnvFile(".env") modelFlag := flag.String("model", "MiniMax-M2.7", "MiniMax 模型 ID") modeFlag := flag.String("mode", "both", "探测模式:native | anthropic | both") flag.Parse() minimaxKey := os.Getenv("MINIMAX_API_KEY") if minimaxKey == "" { fmt.Fprintln(os.Stderr, "未找到 MINIMAX_API_KEY,请 source .env 或设置环境变量") os.Exit(1) } ctx := context.Background() model := *modelFlag fmt.Printf("=== MiniMax 深度探测 | 模型: %s | %s ===\n\n", model, time.Now().Format("2006-01-02 15:04:05")) var modes []string switch *modeFlag { case "native": modes = []string{"native"} case "anthropic": modes = []string{"anthropic"} default: modes = []string{"native", "anthropic"} } for _, mode := range modes { fmt.Printf("────────────────────────────────────────\n") fmt.Printf(" 接入模式: %s\n", mode) fmt.Printf("────────────────────────────────────────\n\n") var p flyto.ModelProvider if mode == "anthropic" { p = minimax.New(minimax.Config{ APIKey: minimaxKey, Mode: minimax.ModeAnthropic, Region: minimax.RegionChina, }) } else { // 精妙之处(CLEVER): Token plan key 只在中国节点有效(api.minimaxi.com). // 全球节点(api.minimax.io)需要另一种 key 格式. // 此处统一用 RegionChina,避免因 key 类型不匹配导致鉴权失败干扰测试结果. p = minimax.New(minimax.Config{ APIKey: minimaxKey, Mode: minimax.ModeNative, Region: minimax.RegionChina, }) } // 1. 工具数量上限探测 fmt.Println("【1】工具数量上限探测") limit := probeToolCountLimit(ctx, p, model) if limit < 0 { fmt.Printf(" 结果: 1 个工具也报错,无法测定上限\n\n") } else { fmt.Printf(" 结果: 最多支持 %d 个工具\n\n", limit) } // 2. JSON Schema 复杂度探测 fmt.Println("【2】JSON Schema 复杂度探测") probeSchemaComplexity(ctx, p, model) fmt.Println() } } // ───────────────────────────────────────────────────── // 工具数量上限探测 // ───────────────────────────────────────────────────── // probeToolCountLimit 找到 MiniMax 支持的最大工具数量. // // 算法: // 1. 指数扩张:从 1 开始翻倍,找到第一个失败的数量(upper bound) // 2. 二分收缩:在 [last_ok, upper] 区间二分,精确定位阈值 // // 返回最大成功数量(-1 = 1 个也失败). func probeToolCountLimit(ctx context.Context, p flyto.ModelProvider, model string) int { // 指数扩张阶段 lastOK := 0 upper := 0 for _, count := range []int{1, 2, 4, 8, 16, 32, 64, 128, 256} { ok, note := testToolCount(ctx, p, model, count) status := "✓" if !ok { status = "✗" } fmt.Printf(" 工具数 %3d: %s %s\n", count, status, note) if ok { lastOK = count } else { upper = count break } } if upper == 0 { // 256 个都通过了,报告 256+ fmt.Printf(" -> 256 个均通过,未找到上限\n") return 256 } if lastOK == 0 { return -1 } // 二分搜索阶段 fmt.Printf(" --- 二分搜索 [%d, %d] ---\n", lastOK, upper) lo, hi := lastOK, upper for hi-lo > 1 { mid := (lo + hi) / 2 ok, note := testToolCount(ctx, p, model, mid) status := "✓" if !ok { status = "✗" } fmt.Printf(" 工具数 %3d: %s %s\n", mid, status, note) if ok { lo = mid } else { hi = mid } } return lo } // testToolCount 发送 count 个工具的请求,检查是否成功(不触发工具调用,只验证请求被接受). // // 精妙之处(CLEVER): 故意不要求模型调用工具(ask="Reply OK")-- // 我们只是想知道"发送 N 个工具定义,API 是否报错", // 不需要模型真的调用工具(那会消耗更多 token 且结果不稳定). func testToolCount(ctx context.Context, p flyto.ModelProvider, model string, count int) (bool, string) { reqCtx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() tools := make([]flyto.Tool, count) for i := 0; i < count; i++ { tools[i] = flyto.Tool{ Name: fmt.Sprintf("tool_%03d", i+1), Description: fmt.Sprintf("Tool number %d for testing purposes.", i+1), InputSchema: json.RawMessage(`{"type":"object","properties":{"input":{"type":"string"}},"required":["input"]}`), } } ch, err := p.Stream(reqCtx, &flyto.Request{ Model: model, MaxTokens: 16, Messages: []flyto.Message{ {Role: flyto.RoleUser, Blocks: []flyto.Block{flyto.TextBlock("Reply OK only, do not call any tools.")}}, }, Tools: tools, }) if err != nil { msg := err.Error() if len(msg) > 80 { msg = msg[:80] + "..." } return false, "connect-err: " + msg } for evt := range ch { switch e := evt.(type) { case *flyto.TextDeltaEvent, *flyto.ThinkingDeltaEvent, *flyto.TextEvent: drain(ch) _ = e return true, "" case *flyto.ToolUseEvent: // 模型调用了工具,说明 API 接受了这批工具定义,也算成功 drain(ch) return true, "(model called a tool)" case *flyto.ErrorEvent: msg := e.Err.Error() if len(msg) > 80 { msg = msg[:80] + "..." } return false, "stream-err: " + msg } } return true, "(stream closed without text, accepted)" } // ───────────────────────────────────────────────────── // JSON Schema 复杂度探测 // ───────────────────────────────────────────────────── type schemaCase struct { name string description string schema string } // schemaCases 是 JSON Schema 特性测试用例集合. // // 升华改进(ELEVATED): 不只是"支持/不支持 JSON Schema"-- // 分级测试 8 种实际使用场景,精确定位哪些特性可用. // 这直接决定 Flyto 能否将复杂工具(Bash/Edit/Glob)移植到 MiniMax. var schemaCases = []schemaCase{ { name: "basic-types", description: "基础类型:string / number / boolean / array", schema: `{ "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "number"}, "active": {"type": "boolean"}, "tags": {"type": "array", "items": {"type": "string"}} }, "required": ["name"] }`, }, { name: "nested-object", description: "嵌套对象(object with nested object)", schema: `{ "type": "object", "properties": { "user": { "type": "object", "properties": { "id": {"type": "string"}, "email": {"type": "string", "format": "email"} }, "required": ["id"] } }, "required": ["user"] }`, }, { name: "enum-oneof", description: "enum / oneOf / anyOf", schema: `{ "type": "object", "properties": { "status": {"type": "string", "enum": ["active", "inactive", "pending"]}, "value": { "oneOf": [ {"type": "string"}, {"type": "number"} ] } }, "required": ["status"] }`, }, { name: "additional-props-false", description: "additionalProperties: false(严格模式)", schema: `{ "type": "object", "properties": { "command": {"type": "string"}, "args": {"type": "array", "items": {"type": "string"}} }, "required": ["command"], "additionalProperties": false }`, }, { name: "refs-defs", description: "$ref / $defs(JSON Schema Draft-7 引用)", schema: `{ "type": "object", "$defs": { "Address": { "type": "object", "properties": { "street": {"type": "string"}, "city": {"type": "string"} } } }, "properties": { "billing": {"$ref": "#/$defs/Address"}, "shipping": {"$ref": "#/$defs/Address"} } }`, }, { name: "long-description", description: "超长 description(>500 字符,模拟真实工具文档)", schema: `{ "type": "object", "properties": { "command": { "type": "string", "description": "The bash command to execute. This can be any valid bash command including pipes, redirections, and multi-line scripts. The command runs in a sandboxed environment with restricted file system access. Avoid commands that require interactive input as they will timeout. Maximum execution time is 120 seconds. The working directory persists between calls unless explicitly changed with cd. Environment variables can be set and will persist within the session. You can use standard Unix utilities like grep, awk, sed, find, etc. Be careful with commands that may have side effects on the file system." }, "timeout": { "type": "number", "description": "Optional timeout in milliseconds. Defaults to 30000ms (30 seconds). Maximum is 600000ms (10 minutes). If the command exceeds this timeout it will be killed and an error returned." } }, "required": ["command"] }`, }, { name: "deep-nesting", description: "5 层深度嵌套对象", schema: `{ "type": "object", "properties": { "level1": { "type": "object", "properties": { "level2": { "type": "object", "properties": { "level3": { "type": "object", "properties": { "level4": { "type": "object", "properties": { "level5": {"type": "string"} } } } } } } } } } }`, }, { name: "bash-like", description: "模拟真实 Bash 工具规格", schema: `{ "type": "object", "properties": { "command": { "type": "string", "description": "The bash command to execute." }, "description": { "type": "string", "description": "A clear, concise description of what this command does in active voice." }, "timeout": { "type": "number", "description": "Optional timeout in milliseconds (max 600000ms / 10 minutes)." }, "run_in_background": { "type": "boolean", "description": "Set to true to run this command in the background." } }, "required": ["command", "description"], "additionalProperties": false }`, }, { name: "edit-like", description: "模拟真实 Edit 工具规格(包含 old/new string)", schema: `{ "type": "object", "properties": { "file_path": { "type": "string", "description": "The absolute path to the file to modify." }, "old_string": { "type": "string", "description": "The text to replace (must be unique in the file)." }, "new_string": { "type": "string", "description": "The text to replace it with." }, "replace_all": { "type": "boolean", "default": false, "description": "Replace all occurrences of old_string." } }, "required": ["file_path", "old_string", "new_string"], "additionalProperties": false }`, }, } // probeSchemaComplexity 对每个 schema 用例发送工具调用请求. // // 策略:让模型必须调用工具(ToolChoice=any),这样如果 schema 有问题, // 模型要么返回错误的 JSON,要么 API 直接报错. func probeSchemaComplexity(ctx context.Context, p flyto.ModelProvider, model string) { // 表头 fmt.Printf(" %-22s %-40s %s\n", "用例", "描述", "结果") fmt.Printf(" %s\n", strings.Repeat("-", 90)) for _, tc := range schemaCases { ok, note := testSchemaCase(ctx, p, model, tc) status := "✓" if !ok { status = "✗" } fmt.Printf(" %-22s %-40s %s %s\n", tc.name, tc.description, status, note) } // $ref 专项诊断:检测双重序列化 bug // 精妙之处(CLEVER): MiniMax 已知 bug--当 $ref 引用的是 object 类型时, // 模型生成的 input 中对应字段是 JSON 字符串("{\"city\":\"NYC\"}")而非对象({"city":"NYC"}). // 这是双重序列化(double-serialization)问题,会导致下游代码用字符串而非 map 处理. // 此处专门验证该 bug 是否存在,输出原始字段值便于确认. fmt.Printf("\n --- $ref 双重序列化专项诊断 ---\n") probeRefDoubleSerialization(ctx, p, model) } // probeRefDoubleSerialization 专项检测 $ref 引用时的双重序列化 bug. // // 问题描述:MiniMax 在工具调用中,当字段类型通过 $ref 引用另一个 object 定义时, // 生成的 JSON input 中该字段的值是 JSON 字符串(如 "{\"city\":\"NYC\"}") // 而不是嵌套对象(如 {"city":"NYC"}). // 这是 MiniMax 在序列化 tool input 时的已知 bug. // // 验证方法:检查 billing 字段的 Go 类型是否为 string(bug)还是 map(正常). func probeRefDoubleSerialization(ctx context.Context, p flyto.ModelProvider, model string) { reqCtx, cancel := context.WithTimeout(ctx, 45*time.Second) defer cancel() schema := json.RawMessage(`{"type":"object","$defs":{"Address":{"type":"object","properties":{"street":{"type":"string"},"city":{"type":"string"}}}},"properties":{"billing":{"$ref":"#/$defs/Address"},"shipping":{"$ref":"#/$defs/Address"}}}`) ch, err := p.Stream(reqCtx, &flyto.Request{ Model: model, MaxTokens: 256, Messages: []flyto.Message{ {Role: flyto.RoleUser, Blocks: []flyto.Block{flyto.TextBlock( "Call probe_tool with billing={street:'Main St',city:'NYC'} and shipping={street:'Broadway',city:'LA'}. You MUST call the tool.", )}}, }, Tools: []flyto.Tool{ { Name: "probe_tool", Description: "Test tool. You MUST call this.", InputSchema: schema, }, }, }) if err != nil { fmt.Printf(" connect-err: %v\n", err) return } var toolInput map[string]any var textResp strings.Builder for evt := range ch { switch e := evt.(type) { case *flyto.ToolUseEvent: toolInput = e.Input case *flyto.TextDeltaEvent: textResp.WriteString(e.Text) case *flyto.ErrorEvent: fmt.Printf(" stream-err: %v\n", e.Err) return } } if toolInput == nil { fmt.Printf(" 未触发工具调用,模型文本回复: %s\n", textResp.String()) return } billing := toolInput["billing"] switch v := billing.(type) { case string: // 双重序列化 bug:值是 JSON 字符串 fmt.Printf(" BUG 确认: billing 字段类型=string(双重序列化),值=%q\n", v) case map[string]any: // 正常:值是嵌套对象 fmt.Printf(" 正常: billing 字段类型=object,street=%v, city=%v\n", v["street"], v["city"]) default: fmt.Printf(" 未知类型: billing=%T(%v)\n", billing, billing) } } // testSchemaCase 测试单个 schema 用例: // 1. 发送含该 schema 的工具定义 // 2. 要求模型调用该工具 // 3. 验证收到的 tool_use 参数是否符合 schema 类型约束 func testSchemaCase(ctx context.Context, p flyto.ModelProvider, model string, tc schemaCase) (bool, string) { reqCtx, cancel := context.WithTimeout(ctx, 45*time.Second) defer cancel() // 压缩 schema(去掉多余空白) var schemaObj any if err := json.Unmarshal([]byte(tc.schema), &schemaObj); err != nil { return false, "invalid-test-schema: " + err.Error() } schemaBytes, _ := json.Marshal(schemaObj) ch, err := p.Stream(reqCtx, &flyto.Request{ Model: model, MaxTokens: 256, Messages: []flyto.Message{ {Role: flyto.RoleUser, Blocks: []flyto.Block{flyto.TextBlock( "Please call the probe_tool function with any valid input that matches its schema.", )}}, }, Tools: []flyto.Tool{ { Name: "probe_tool", Description: "A probe tool for testing schema support. Call it with any valid input.", InputSchema: json.RawMessage(schemaBytes), }, }, }) if err != nil { msg := err.Error() if len(msg) > 80 { msg = msg[:80] + "..." } return false, "connect-err: " + msg } var gotToolUse bool var toolInput map[string]any var streamErr string for evt := range ch { switch e := evt.(type) { case *flyto.ToolUseEvent: gotToolUse = true toolInput = e.Input case *flyto.ErrorEvent: streamErr = e.Err.Error() if len(streamErr) > 80 { streamErr = streamErr[:80] + "..." } } } if streamErr != "" { return false, "stream-err: " + streamErr } if !gotToolUse { return false, "no tool_use event (model responded in text)" } // 检查返回的 input 是否是有效 JSON object if toolInput == nil { return false, "tool called but input is nil" } inputBytes, _ := json.Marshal(toolInput) inputPreview := string(inputBytes) if len(inputPreview) > 60 { inputPreview = inputPreview[:60] + "..." } return true, fmt.Sprintf("input=%s", inputPreview) } // ───────────────────────────────────────────────────── // 工具函数 // ───────────────────────────────────────────────────── // drain 消费并丢弃 channel 剩余事件,防止 goroutine 泄漏. func drain(ch <-chan flyto.Event) { for range ch { } } // loadEnvFile 从文件加载 KEY=VALUE 到环境变量(已存在的不覆盖). func loadEnvFile(path string) { f, err := os.Open(path) if err != nil { return } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" || strings.HasPrefix(line, "#") { continue } parts := strings.SplitN(line, "=", 2) if len(parts) != 2 { continue } key := strings.TrimSpace(parts[0]) val := strings.TrimSpace(parts[1]) if os.Getenv(key) == "" { os.Setenv(key, val) } } }