// Package deepseek implements a flyto.ModelProvider for DeepSeek's official API. // // DeepSeek 官方 API 实现, ADR-0007 § 2.1 第 5 个 direct provider. // // DeepSeek offers two API formats: // 1. OpenAI compat - https://api.deepseek.com (default, primary) // 2. Anthropic compat - https://api.deepseek.com/anthropic // // Default ModeOpenAI rationale: // - The reasoning_content passback contract (r24 root cause) is wired in // the OpenAI-compat wire layer (ADR-0007 C4 commit 17f7ba4); routing // production traffic through ModeOpenAI directly consumes that fix. // - Anthropic-compat ignores budget_tokens / cache_control / top_k / // image / mcp_servers (per /guides/anthropic_api). // - Anthropic-compat silently falls back unknown model ids to v4-flash // (per /guides/anthropic_api), masking probe / config bugs. // // 默认 ModeOpenAI 原因: // - reasoning_content passback 协议 (r24 真因) 在 OpenAI compat wire 层 // (ADR-0007 C4 commit 17f7ba4) 已修, 走 OpenAI compat 直接接通已修的 wire 层. // - Anthropic compat 限制更多: budget_tokens / cache_control / top_k / // image / mcp_servers 全部 ignore (官方文档 /guides/anthropic_api). // - Anthropic compat 有静默 model fallback 坑: 送错 ID 不 4xx 自动落到 // v4-flash, 掩盖 probe / 配置错误. package deepseek import ( "context" "fmt" "net/http" "time" api "git.flytoex.net/yuanwei/flyto-agent/internal/transport" "git.flytoex.net/yuanwei/flyto-agent/internal/transport/retry" "git.flytoex.net/yuanwei/flyto-agent/internal/wire" "git.flytoex.net/yuanwei/flyto-agent/pkg/flyto" "git.flytoex.net/yuanwei/flyto-agent/pkg/providers/shared" ) // Mode selects the API format used to talk to DeepSeek. // // Mode 选择对接 DeepSeek 的 API 格式. type Mode string const ( // ModeOpenAI uses the OpenAI-compatible endpoint at /v1/chat/completions // with Authorization: Bearer auth. This is the default and primary path. // // ModeOpenAI 走 OpenAI 兼容端点 /v1/chat/completions, Bearer 鉴权. // 默认主路径. ModeOpenAI Mode = "openai" // ModeAnthropic uses the Anthropic-compatible endpoint at // /anthropic/v1/messages with x-api-key auth. anthropic-version / // anthropic-beta headers are ignored by DeepSeek. // // ModeAnthropic 走 Anthropic 兼容端点 /anthropic/v1/messages, x-api-key // 鉴权. anthropic-version / anthropic-beta header 被 DeepSeek 忽略. ModeAnthropic Mode = "anthropic" ) // Config configures the DeepSeek provider. // // Config 是 DeepSeek provider 的配置. type Config struct { // APIKey is the DeepSeek API key (from platform.deepseek.com). // // APIKey 是 DeepSeek API 密钥 (从 platform.deepseek.com 获取). APIKey string // BaseURL overrides the default endpoint. Empty uses // https://api.deepseek.com (the API path suffix is appended per Mode). // // BaseURL 覆盖默认端点. 空字符串使用 https://api.deepseek.com (Mode 决定追加路径后缀). BaseURL string // Mode selects OpenAI vs Anthropic compat (default ModeOpenAI). // // Mode 选择 OpenAI vs Anthropic 兼容 (默认 ModeOpenAI). Mode Mode // ThinkingBudget enables thinking mode and sets the max reasoning tokens. // 0 = disabled. // // ModeAnthropic ignores this value per DeepSeek docs but the field is // still accepted for symmetry with other providers. // // ThinkingBudget 启用思考模式并设置 reasoning token 上限. 0 = 禁用. // ModeAnthropic 据 DeepSeek 文档 ignore 此值, 但仍接受参数保持与其他 // provider 接口对称. ThinkingBudget int // HTTPClient injects a custom HTTP client. nil uses the default // http.Client with ResponseHeaderTimeout set from Timeout below. // When non-nil the consumer fully owns timeout policy and Timeout is // ignored (do not set http.Client.Timeout - it kills SSE streams). // // HTTPClient 注入自定义 HTTP 客户端. nil 使用默认 http.Client (带 Timeout // 字段配置的 ResponseHeaderTimeout). 非 nil 时 consumer 完全接管超时 // 责任, Timeout 字段被忽略 (不要设 http.Client.Timeout - 会砍死 SSE 流). HTTPClient *http.Client // Timeout limits the time from request send to first response byte. // 0 = use defaultTimeout (60s). Implemented via // http.Transport.ResponseHeaderTimeout, does NOT affect SSE body reads // (DeepSeek long thinking can still complete). // // Timeout 限制 "从请求发出到收到响应首字节" 的时间. 0 = 使用 defaultTimeout // (60s). 通过 http.Transport.ResponseHeaderTimeout 实现, 不影响 SSE 流式 // 响应的后续 body 读取 (DeepSeek 长思考模式可以正常读完). Timeout time.Duration // ModelOverrides replaces the static model table returned by Models(). // Useful for tests / preview models not in the default table. // // ModelOverrides 覆盖 Models() 返回的静态模型表, 用于测试 / 预览模型注入. ModelOverrides []flyto.ModelInfo } // 60s suits DeepSeek cloud (api.deepseek.com); thinking-mode bodies stream // for minutes but only the first byte is bounded by ResponseHeaderTimeout. // // 60s 对 DeepSeek 云端 (api.deepseek.com) 合理; 思考模式 body 流式可达数分钟 // 但 ResponseHeaderTimeout 只约束首字节. const defaultTimeout = 60 * time.Second // GoString masks the API key in printf("%#v") output. // // GoString 在 printf("%#v") 输出中遮蔽 API key. func (c Config) GoString() string { return shared.GoStringWithMaskedKey("deepseek.Config", c.APIKey) } // Provider is the DeepSeek API client (one of wireClient or anthroClient // is non-nil based on Mode). // // Provider 是 DeepSeek API 客户端 (按 Mode 二选一持有 wireClient 或 anthroClient). type Provider struct { cfg Config baseURL string wireClient *wire.OpenAICompatClient // ModeOpenAI 路径 anthroClient *api.Client // ModeAnthropic 路径 } // New constructs a DeepSeek provider. // // New 构造 DeepSeek provider. func New(cfg Config) *Provider { if cfg.Mode == "" { cfg.Mode = ModeOpenAI } baseURL := cfg.BaseURL if baseURL == "" { baseURL = "https://api.deepseek.com" } p := &Provider{cfg: cfg, baseURL: baseURL} switch cfg.Mode { case ModeAnthropic: // Anthropic compat path: /anthropic/v1/messages with x-api-key auth. // DeepSeek ignores anthropic-version / anthropic-beta but the // transport client injects them harmlessly for parity with the // official Anthropic provider (so consumers migrating from // anthropic.New(BaseURL=...) get the same retry / classifier // surface). budget_tokens is silently ignored by DeepSeek. // // Anthropic 兼容路径: /anthropic/v1/messages, x-api-key 鉴权. // DeepSeek 忽略 anthropic-version / anthropic-beta, 但 transport // client 仍无害注入以与官方 anthropic provider 对等 (让从 // anthropic.New(BaseURL=...) 迁移的消费者获得相同 retry / // classifier 表面). budget_tokens 被 DeepSeek 静默忽略. opts := []api.ClientOption{ api.WithMessagePath("/v1/messages"), api.WithAPIVersion("2023-06-01"), api.WithClassifier(&api.AnthropicClassifier{Hinter: &api.DefaultHinter{}}), api.WithRetryPolicy(retry.NewAnthropicRetryPolicy(retry.AnthropicRetryOpts{})), api.WithOverflowHandler(retry.DefaultOverflowHandler()), } if cfg.HTTPClient != nil { opts = append(opts, api.WithHTTPClient(cfg.HTTPClient)) } else { timeout := cfg.Timeout if timeout == 0 { timeout = defaultTimeout } opts = append(opts, api.WithResponseHeaderTimeout(timeout)) } // DeepSeek Anthropic compat 文档明示 x-api-key 是 "fully supported"; // transport 默认 x-api-key (不调 WithBearerAuth). // DeepSeek Anthropic-compat docs explicitly say x-api-key is // "fully supported"; transport defaults to x-api-key (do not call // WithBearerAuth here). p.anthroClient = api.NewClient(cfg.APIKey, baseURL+"/anthropic", opts...) default: // OpenAI compat path: /v1/chat/completions with Bearer auth. // wire.OpenAICompatClient hardcodes Bearer auth (the OpenAI // industry standard) which matches DeepSeek's OpenAI-compat // endpoint exactly. // // OpenAI 兼容路径: /v1/chat/completions, Bearer 鉴权. // wire.OpenAICompatClient 硬编 Bearer (OpenAI 行业标准), // 与 DeepSeek OpenAI 兼容端点完全匹配. wireOpts := []wire.OpenAICompatOption{} if cfg.HTTPClient != nil { wireOpts = append(wireOpts, wire.WithHTTPClient(cfg.HTTPClient)) } else { timeout := cfg.Timeout if timeout == 0 { timeout = defaultTimeout } wireOpts = append(wireOpts, wire.WithResponseHeaderTimeout(timeout)) } p.wireClient = wire.NewOpenAICompatClient(cfg.APIKey, baseURL, wireOpts...) } return p } var _ flyto.ModelProvider = (*Provider)(nil) // Name returns the provider identifier. // // Name 返回 provider 标识. func (p *Provider) Name() string { return "deepseek" } // Stream issues a streaming request to DeepSeek. // // DeepSeek does not support image / document / video input on either // V4 model (per official docs); image blocks are rejected up front // with a typed error from shared.CheckNoImageBlocks rather than being // forwarded for the server to reject. // // Stream 向 DeepSeek 发起流式请求. // // DeepSeek 两个 V4 模型均不支持图像 / 文档 / 视频输入 (官方文档), // image block 在 shared.CheckNoImageBlocks 提前用 typed error 拒绝, // 不下发给服务端避免无意义的 4xx. func (p *Provider) Stream(ctx context.Context, req *flyto.Request) (<-chan flyto.Event, error) { if err := shared.CheckNoImageBlocks(req.Messages, "deepseek"); err != nil { return nil, err } // want × can warning: thinking opt-in 但模型不支持 → emit warning // (silent disable 变 loud, ADR-0007 § 红线之外的诊断提示, 不阻断). // want×can warning: thinking opt-in but model lacks support → // emit warning so silent disable becomes loud (advisory, does // not block the request). warnings := p.detectFeatureWarnings(req) var ch <-chan flyto.Event var err error switch p.cfg.Mode { case ModeAnthropic: ch, err = p.streamAnthropic(ctx, req) default: ch, err = p.streamOpenAI(ctx, req) } if err != nil { return nil, err } if len(warnings) > 0 { return prependWarnings(ch, warnings), nil } return ch, nil } // streamOpenAI dispatches via the OpenAI-compat wire client. // // streamOpenAI 走 OpenAI 兼容 wire client. func (p *Provider) streamOpenAI(ctx context.Context, req *flyto.Request) (<-chan flyto.Event, error) { tools := make([]flyto.Tool, len(req.Tools)) copy(tools, req.Tools) for i, t := range tools { // Best-effort $ref expansion + OpenAI schema adaptation; // errors fall back to original schema (let the server reject). // $ref 展开 + OpenAI schema 适配 best-effort; 错时 fallback 原 schema 让服务端裁决. if expanded, err := wire.DereferenceSchema(t.InputSchema); err == nil { tools[i].InputSchema = expanded } tools[i].InputSchema = wire.AdaptSchema(tools[i].InputSchema, "openai") } wireReq := &wire.StreamRequest{ Model: req.Model, Messages: req.Messages, System: req.System, MaxTokens: req.MaxTokens, Tools: tools, ResponseFormat: req.ResponseFormat, Temperature: req.Temperature, TopP: req.TopP, } // ADR-0007 C4 capability passback with provider-level fallback. // // Preference order: registry-injected req.Capabilities > static // deepseekModels lookup. The fallback is critical for DeepSeek // because reasoning_content passback (r24 root cause) is required // for tool-calling correctness; consumers that forget to wire // ModelInfo into a ModelRegistry would otherwise hit a 400 from // the DeepSeek server every multi-turn tool-calling session. // // Zero values still skip behavior in wire.go (ADR-0007 § zero- // regression contract), so the fallback only activates for known // DeepSeek model ids; unknown models pass through unchanged. // // ADR-0007 C4 capability passback + provider 兜底. // // 优先级: registry 注入的 req.Capabilities > 静态 deepseekModels // 查表. DeepSeek 必须 fallback 是因为 reasoning_content passback // (r24 真因) 对 tool-calling 正确性是硬要求; 消费者忘记把 ModelInfo // 注册进 ModelRegistry 就会每个多轮 tool-calling session 都被 // DeepSeek 服务端 400 拒. // // 零值仍由 wire.go 跳过行为 (ADR-0007 § 零回归契约), 兜底只对已知 // DeepSeek model id 生效, 未知 model 保持透传不变. wireReq.ReasoningPassbackMode, wireReq.ToolNameRegex = resolveCapabilities(req) if p.cfg.ThinkingBudget > 0 || req.NeedsThinking { wireReq.Reasoning = &wire.Reasoning{ MaxTokens: p.cfg.ThinkingBudget, Enabled: true, } } return p.wireClient.Stream(ctx, wireReq) } // streamAnthropic dispatches via the Anthropic-compat client. // // streamAnthropic 走 Anthropic 兼容 client. func (p *Provider) streamAnthropic(ctx context.Context, req *flyto.Request) (<-chan flyto.Event, error) { apiReq := &api.MessageRequest{ Model: req.Model, MaxTokens: req.MaxTokens, Stream: true, // DeepSeek Anthropic-compat passes Temperature / TopP through; // no client-side override (let server enforce ranges). // DeepSeek Anthropic 兼容透传 Temperature / TopP, 不做 client-side override (让服务端裁决越界). Temperature: req.Temperature, TopP: req.TopP, } if req.System != "" { apiReq.SetSystemString(req.System) } // Anthropic-compat ignores budget_tokens (DeepSeek docs) but we still // set the field for protocol parity; absence vs zero is meaningful // to the conversion layer. // Anthropic 兼容 ignore budget_tokens (DeepSeek 文档), 但仍设字段保持 // 协议对等; "缺省 vs 零值" 对转换层语义不同. budget := p.cfg.ThinkingBudget if budget == 0 && req.NeedsThinking { budget = 8000 } if budget > 0 { apiReq.Thinking = &api.ThinkingConfig{ Type: "enabled", BudgetTokens: budget, } } for _, msg := range req.Messages { role := string(msg.Role) if len(msg.Blocks) == 1 && msg.Blocks[0].Type == flyto.BlockText { apiReq.Messages = append(apiReq.Messages, api.NewTextMessage(role, msg.Blocks[0].Text)) continue } converted, err := convertBlocks(msg.Blocks) if err != nil { return nil, fmt.Errorf("deepseek: convert message blocks: %w", err) } apiReq.Messages = append(apiReq.Messages, api.NewBlockMessage(role, converted)) } for _, t := range req.Tools { apiReq.Tools = append(apiReq.Tools, api.ToolDef{ Name: t.Name, Description: t.Description, InputSchema: t.InputSchema, }) } return p.anthroClient.CreateMessageStream(ctx, apiReq) } // convertBlocks turns a flyto.Block slice into the Anthropic-compat // content array. DeepSeek Anthropic-compat does not support image / // document blocks (rejected earlier in Stream), so we only handle text // / tool_use / tool_result / thinking blocks here. // // convertBlocks 把 flyto.Block 切片转为 Anthropic 兼容的 content 数组. // DeepSeek Anthropic 兼容不支持 image / document 块 (已在 Stream 入口拒), // 这里只处理 text / tool_use / tool_result / thinking 块. func convertBlocks(blocks []flyto.Block) ([]api.ContentBlock, error) { out := make([]api.ContentBlock, 0, len(blocks)) for _, b := range blocks { switch b.Type { case flyto.BlockText: out = append(out, api.ContentBlock{Type: "text", Text: b.Text}) case flyto.BlockThinking: // thinking block 回传必须携带 signature; ProviderMetadata 缺 // 字段时空 signature 由服务端 4xx 兜底 (与 anthropic provider 同款). // thinking blocks must echo their signature; missing metadata // surfaces as a 4xx from the server (parity with anthropic). var sig string if b.ProviderMetadata != nil { sig = b.ProviderMetadata["thinking_signature"] } out = append(out, api.ContentBlock{ Type: "thinking", Text: b.ThinkingText, Signature: sig, }) case flyto.BlockToolUse: out = append(out, api.ContentBlock{ Type: "tool_use", ID: b.ToolUseID, Name: b.ToolName, Input: b.ToolInput, }) case flyto.BlockToolResult: out = append(out, api.ContentBlock{ Type: "tool_result", ToolUseID: b.ToolUseID, Content: b.ResultText, IsError: b.IsError, }) default: return nil, fmt.Errorf("deepseek: unsupported block type %q", b.Type) } } return out, nil } // Models returns the static DeepSeek model table. // // Models 返回 DeepSeek 模型静态表. func (p *Provider) Models(_ context.Context) ([]flyto.ModelInfo, error) { if len(p.cfg.ModelOverrides) > 0 { return p.cfg.ModelOverrides, nil } return deepseekModels, nil } // resolveCapabilities returns (ReasoningPassbackMode, ToolNameRegex) // for the request, preferring registry-injected req.Capabilities and // falling back to the static deepseekModels table when the registry // has not been wired. Unknown model ids return zero values (wire layer // then skips capability behavior per ADR-0007 zero-regression contract). // // resolveCapabilities 返回当前请求的 (ReasoningPassbackMode, ToolNameRegex). // 优先 registry 注入的 req.Capabilities, 未注册时 fallback 到静态 // deepseekModels 查表. 未知 model id 返回零值 (wire 层按 ADR-0007 零回归 // 契约跳过 capability 行为). func resolveCapabilities(req *flyto.Request) (mode, regex string) { if req.Capabilities != nil { mode = req.Capabilities.ReasoningPassbackMode regex = req.Capabilities.ToolNameRegex } if mode == "" || regex == "" { for _, m := range deepseekModels { if m.ID == req.Model { if mode == "" { mode = m.ReasoningPassbackMode } if regex == "" { regex = m.ToolNameRegex } break } } } return mode, regex } // detectFeatureWarnings emits a WarningEvent when ThinkingBudget / // NeedsThinking is set but the target model does not support thinking // (silent-disable visibility per ADR-0007 advisory contract). // // detectFeatureWarnings 在 ThinkingBudget / NeedsThinking 已设但目标模型不支持 // thinking 时发 WarningEvent (silent-disable 可见化, 对齐 ADR-0007 advisory 约定). func (p *Provider) detectFeatureWarnings(req *flyto.Request) []*flyto.WarningEvent { wantsThinking := p.cfg.ThinkingBudget > 0 || req.NeedsThinking if !wantsThinking { return nil } supports := false if req.Capabilities != nil { supports = req.Capabilities.SupportsThinking } else { for _, m := range deepseekModels { if m.ID == req.Model { supports = m.SupportsThinking break } } } if supports { return nil } return []*flyto.WarningEvent{{ Code: "feature_unsupported", Message: "Config.ThinkingBudget/NeedsThinking 已设置但模型 " + req.Model + " 不支持 thinking, thinking 可能被 API 忽略", Detail: "model=" + req.Model + " feature=thinking", }} } // prependWarnings emits warnings before downstream events (parity with // other providers; RFC §3 keeps this per-package, no shared helper). // // prependWarnings 在下游事件前发 warning (与其他 provider 对齐; RFC §3 // 决策保持每包独立, 不抽 shared helper). func prependWarnings(downstream <-chan flyto.Event, warnings []*flyto.WarningEvent) <-chan flyto.Event { out := make(chan flyto.Event, len(warnings)) for _, w := range warnings { out <- w } go func() { defer close(out) for evt := range downstream { out <- evt } }() return out } // deepseekModels is the static DeepSeek model table. // // Source: https://api-docs.deepseek.com/quick_start/pricing // // ADR-0007 § 2.1 capability fields: // - ProviderKind = "direct" (DeepSeek 第一方端点, 非聚合层). // - ToolNameRegex = "^[a-zA-Z0-9_-]+$" (DeepSeek 文档未明示, 默认走 // OpenAI 兼容协议最严; TD-20 prober 实测后回调整). // - ReasoningPassbackMode = "string" (r24 真因 + 官方文档双确认: // /guides/thinking_mode 明示 "with tool calls, reasoning_content // must be passed back to the API ... otherwise 400". TD-22 不需 // OpenRouter 路径单独探, deepseek 直连此值确定). // // SupportsVision = false (DeepSeek API reference 仅 text content type; // V4-flash / V4-pro 均无 image 输入). // // SupportsCaching = true (kv_cache 自动启用, 不需 cache_control 标记; // usage 字段为 prompt_cache_hit_tokens / prompt_cache_miss_tokens 而非 // OpenAI 的 cached_tokens, TD-26 跟踪 wire 层映射对齐). // // deepseekModels 是 DeepSeek 模型静态表. // // 来源: https://api-docs.deepseek.com/quick_start/pricing // // ADR-0007 § 2.1 capability 字段: // - ProviderKind = "direct" (DeepSeek 第一方端点, 非聚合层). // - ToolNameRegex = "^[a-zA-Z0-9_-]+$" (DeepSeek 文档未明示, 默认走 // OpenAI 兼容协议最严; TD-20 prober 实测后回调整). // - ReasoningPassbackMode = "string" (r24 真因 + 官方文档双确认: // /guides/thinking_mode 明示 "with tool calls, reasoning_content // must be passed back to the API ... otherwise 400"). // // SupportsVision = false (DeepSeek API 仅 text content type, V4-flash / // V4-pro 均无 image 输入). // // SupportsCaching = true (kv_cache 自动启用, 不需 cache_control; usage // 字段为 prompt_cache_hit_tokens / prompt_cache_miss_tokens 而非 OpenAI // 的 cached_tokens, TD-26 跟踪 wire 层映射对齐). var deepseekModels = []flyto.ModelInfo{ { ID: "deepseek-v4-flash", DisplayName: "DeepSeek V4 Flash", Provider: "deepseek", ToolNameRegex: `^[a-zA-Z0-9_-]+$`, ReasoningPassbackMode: "string", ProviderKind: "direct", ContextWindow: 1_000_000, MaxOutputTokens: 384_000, // Pricing per https://api-docs.deepseek.com/quick_start/pricing // (USD per 1M tokens, list price; cache write not separately // charged — DeepSeek auto-caches without explicit cache_control). // Pro 当前 75% off 促销至 2026-05-31, **静态表填原价**, 折扣是 // 对账时按促销折算, 不入静态值 (静态表跨时间稳定; dashboard // 跟此值已知有折扣差异, 修 cost=$0 → cost=理论价就够). // // 定价取自官方文档 (USD / 1M tokens, list price; cache write 不 // 单独计费 — DeepSeek auto-cache 无 explicit cache_control). InputPricePer1M: 0.14, // input cache miss OutputPricePer1M: 0.28, // output CacheReadPricePer1M: 0.0028, // input cache hit (1/50 of miss) CacheWritePricePer1M: 0, // not separately charged SupportsCaching: true, SupportsThinking: true, SupportsVision: false, // CachingMinTokens 留 0 (文档未指定阈值; 实测后通过 capability-probe 回填). // CachingMinTokens left 0 (no documented threshold; backfill via probe). CachingMinTokens: 0, // MaxTools 留 0 (文档未指定上限, probe 默认 ≥128 不触底). // MaxTools left 0 (no documented cap; probe defaults ≥128 unbroken). MaxTools: 0, }, { ID: "deepseek-v4-pro", DisplayName: "DeepSeek V4 Pro", Provider: "deepseek", ToolNameRegex: `^[a-zA-Z0-9_-]+$`, ReasoningPassbackMode: "string", ProviderKind: "direct", ContextWindow: 1_000_000, MaxOutputTokens: 384_000, // 同 Flash 注释: list price, Pro 当前 75% off 至 2026-05-31 不入 // 静态值. 1M token batch full-load = $0.435 input(miss) + $0.87 // output = $1.305 USD 上限, 折后 ~$0.326 dashboard 真实计费. InputPricePer1M: 0.435, OutputPricePer1M: 0.87, CacheReadPricePer1M: 0.003625, // 1/120 of miss CacheWritePricePer1M: 0, SupportsCaching: true, SupportsThinking: true, SupportsVision: false, CachingMinTokens: 0, MaxTools: 0, }, }