// This file defines the v0.2+ canonical interface contracts for evolve. // See doc.go for package overview, interface topology, reference // implementations, strategic background, and academic references. package evolve import ( "context" "errors" "time" ) // ============================================================================ // Generator: LLM 多方案生成 // ============================================================================ // Generator 生成 K 个候选方案. 一次 LLM 调用可产出多个候选 (做法 B, 见 // docs/evolve-strategy.md §6.1), 通常覆盖 4-6 次 LLM 调用 × 平均 3 候选. // // 为什么 K 是运行时参数而非构造时: // 同一 Generator 在不同阶段需要不同 K (探索阶段 K=10, 稳态 K=3). // Generate 参数化让一个 Generator 可复用于 fast loop 和 slow loop. // // 为什么 Candidate.Payload 用 any: // 不同场景的 Candidate 结构差异巨大 (YAML / 承运商 ID / 补丁对象), // 引擎层无法预设统一结构. // 替代方案 (已放弃): interface{ Key() string; Payload() []byte } -- // 强制 caller 额外包装, 且序列化开销不必要. type Generator interface { Generate(ctx context.Context, input any, K int, opts ...GenOpt) ([]Candidate, error) } // Candidate 是 Generator 产出的单个候选方案. Meta 携带生成时的元数据 // (temperature / model / role), 供 Evaluator 和 ParameterEvolver 回溯. type Candidate struct { ID string Payload any Meta map[string]any } // GenOpt 是 Generator.Generate 的 functional option. 预期选项包括 // 温度 / 模型 / 角色 / 约束 / 历史注入 (见 docs/evolve-strategy.md §6.1). type GenOpt func(*genConfig) type genConfig struct { Temperature float64 TopP float64 Roles []string Extras map[string]any } // WithTemperature sets LLM sampling temperature for this Generate call. // Zero is treated as "unset" (use the LLMClient/provider default), so a // caller wanting deterministic 0.0 sampling must instead bypass this option // and configure provider Config.Temperature directly. See LLMCallOpts godoc // for the rationale. // // WithTemperature 设置本次 Generate 调用的 LLM 采样温度. 零值视为 "未设" // (用 LLMClient / provider 默认), 想要严格 deterministic 0.0 的调用方 // 应绕过本 option 直接在 provider Config 层固定. 详见 LLMCallOpts godoc. func WithTemperature(t float64) GenOpt { return func(c *genConfig) { c.Temperature = t } } // WithTopP sets LLM nucleus sampling cutoff (top_p) for this Generate call. // Zero is treated as "unset" with the same rationale as WithTemperature. // // WithTopP 设置本次 Generate 调用的 LLM nucleus 采样阈值 (top_p). // 零值视为 "未设", 语义同 WithTemperature. func WithTopP(p float64) GenOpt { return func(c *genConfig) { c.TopP = p } } // WithRoles sets role-play prompts (做法 D in docs/evolve-strategy.md §6.1). func WithRoles(roles ...string) GenOpt { return func(c *genConfig) { c.Roles = roles } } // ============================================================================ // Evaluator: 便宜打分器 // ============================================================================ // Evaluator 对 Candidate 打 fitness 分, 返回总分和分项 breakdown. // // breakdown 的三个用途: // 1. 调试 -- 哪个维度低导致总分低 // 2. 长反射 -- 发现某维度预测不准 (MetaEvaluator 未来的输入) // 3. 透明度面板 -- 给客户展示评分依据 // // breakdown 可为 nil (单维度评分时). type Evaluator interface { Score(ctx context.Context, c Candidate) (fitness float64, breakdown map[string]float64, err error) } // ============================================================================ // Reflector: LogReplayer 的消费端 // ============================================================================ // Reflector 消费 LogReplayer 推送的事件, 转化为参数调整建议. // // 为什么独立于 ParameterEvolver: // Reflector 是 event-driven (被 LogReplayer 回调), ParameterEvolver // 是 on-demand (caller 主动问). 一个 Reflector 实现内部可调多个 // ParameterEvolver 更新不同参数. 保持两者独立避免循环依赖. // // 错误语义: // OnEvent 返回 error 时 LogReplayer 不会停止, 会记录错误并继续下一条. // Reflector 实现不应假设事件处理顺序原子性 -- 同 key 多条事件可能并发到达. type Reflector interface { OnEvent(ctx context.Context, event ReplayEvent) error } // ReplayEvent is the event LogReplayer pushes to registered Reflectors. // Feedback is nil when the decision has just landed and KPI has not arrived // yet. Meta is a free-form extension slot for LogReplayer implementations to // attach replay-time context (replay session ID, upstream source, causal // cohort tag) that downstream Reflectors may consume -- external replayer // impls fill it; the in-tree DefaultLogReplayer leaves it nil. // // ReplayEvent 是 LogReplayer 推送给 Reflector 的事件. Feedback 为 nil 表示 // KPI 尚未到达 (决策刚发生, 反馈延迟中). Meta 是给外部 LogReplayer 实现预留 // 的 free-form 扩展槽, 可挂 replay 会话 ID / 上游来源 / 因果分组 tag 等 // 供下游 Reflector 消费 -- 内置 DefaultLogReplayer 不填, 字段留着给外部 // replayer 实现激活. type ReplayEvent struct { Log LogEntry Feedback *Feedback Meta map[string]any } // ============================================================================ // ParameterStore: 版本化参数存储 // ============================================================================ // ParameterStore 是 evolve 参数的版本化存储. // // key 语义: // 领域无关字符串 (如 "evolve.carrier_risk_penalty.Y-express"), caller // 约定命名 scheme. 引擎不假设 key 结构. // 替代方案 (已放弃): 结构化 ParamKey{Domain,Category,Entity} -- // 违反"领域无关"原则 (引擎约束 scheme). // // reason 强制 (合规硬需求): // Set / Rollback / Lock / Unlock 的 reason 空字符串返回 ErrReasonRequired. // 审计链的每条变更必须有人类可读原因. // // Rollback 语义: // 回滚 = "复制旧版本到新版本号", 保持审计链完整, 不删除中间历史. // // Lock 独立于配置开关的原因: // Lock 是原子操作 + 带审计事件 (谁 Lock / 为何 Lock). 配置开关 + Set // 拦截器的组合方案有 race condition 且无审计. // // 多实现共存: // 引擎提供 FileParameterStore (本地文件系统, CLI/TUI). platform 层 // 自行实现 SQL 版本 (多租户 + 分布式审计). 接口契约保证切换无缝. type ParameterStore interface { Get(ctx context.Context, key string) (value any, version int, err error) Set(ctx context.Context, key string, value any, reason string) (newVersion int, err error) List(ctx context.Context, prefix string) (keys []string, err error) History(ctx context.Context, key string, limit int) ([]Change, error) Rollback(ctx context.Context, key string, toVersion int, reason string) (newVersion int, err error) Lock(ctx context.Context, key string, reason string) error Unlock(ctx context.Context, key string, reason string) error Watch(ctx context.Context, keyPrefix string) (<-chan ChangeEvent, error) } // Change 是一次参数变更的历史记录. Author 记录改动发起方 // (evolver id / 人工 user / rollback / lock / unlock). type Change struct { Version int Value any Reason string Timestamp time.Time Author string } // ChangeEvent is what ParameterStore.Watch pushes to subscribers. IsLock=true // marks a Lock/Unlock event (Change.Value is nil in that case); otherwise // it's a Set/Rollback. Change carries the full audit row (Version / Value / // Reason / Timestamp / Author) so subscribers do not need a follow-up // History() call. // // Consumers are external: dashboards rendering the audit timeline, platform // services fanning Watch out to tenants, test harnesses asserting ordering. // scan-baseline.json lists Change / IsLock as dead because no in-tree code // reads them after the test harness verifies forward-propagation in // parameter_store_file_test.go -- expected pull-API state. // // ChangeEvent 是 ParameterStore.Watch 推送给订阅方的事件. IsLock=true 标记 // Lock/Unlock (此时 Change.Value 为 nil), 否则是 Set/Rollback. Change 带完整 // 审计行 (Version/Value/Reason/Timestamp/Author), 订阅方不用再回 History(). // // 消费方在 core 之外: 审计时间线 dashboard / platform 把 Watch 分发给多租户 / // 测试检查顺序. scan-baseline.json 把 Change / IsLock 列 dead 是因为 core // 内无 reader -- test 在 parameter_store_file_test.go 锁了 forward, 外部 // pull 消费是预期形态. type ChangeEvent struct { Key string Change Change IsLock bool } // ============================================================================ // ParameterEvolver: 参数演化器 // ============================================================================ // ParameterEvolver 根据 evidence 提议新参数值. // // 职责吸收原 SelectionPressure: // 偏好权重本身就是参数的一种, 和规则系数 / 阈值无本质区别, 合并到 // ParameterEvolver. 一个实现可同时处理多种参数类型. // // Propose / Apply 两阶段: // Propose 只返回建议值 + 置信度, 不写 ParameterStore. caller 决定是否 // Apply (可能还要过 ApprovalFunc). 两阶段支持: // - 人工审批 (Propose → 操作员看 → Apply) // - Shadow 模式 (Propose → ShadowRunner 对比 → Apply) // - 批量决策 (多个 Propose 一起审批) // Propose 接收 []Feedback 作为 evidence: Feedback 本身 (下方 KPI 通道 // 返回的一条 KPI 反馈) 字段完整包含所有 Propose 需要的证据维度 (Entity / // Metric / Value / Confidence / Timestamp / Meta). 早期版本另定义了一个 // 字段完全相同、用途重合的 FeedbackRecord, 造成"同一语义两份类型"的 // 设计冗余, 本轮合并清理. // // Propose takes []Feedback as evidence: Feedback (the KPI channel record // defined below) already carries every dimension Propose needs (Entity / // Metric / Value / Confidence / Timestamp / Meta). Earlier revisions // defined a field-identical, semantically-overlapping FeedbackRecord // sibling -- "one meaning, two types" duplication. This round collapses // them into Feedback. type ParameterEvolver interface { Propose(ctx context.Context, key string, evidence []Feedback) (proposedValue any, confidence float64, err error) Apply(ctx context.Context, key string, value any, approved bool, reason string) error } // ============================================================================ // LogReplayer: 历史日志回放 // ============================================================================ // LogReplayer 扫历史决策日志, 配对真实 KPI 反馈, 推送给已注册 Reflector. // // 为什么叫 Replayer 不叫 Scanner: // Replayer 强调"重放决策时刻的完整上下文" -- 不只读日志, 还和 // FeedbackChannel 配对真实后果. Scanner 只读不配. // // 调度边界: // 扫描频率 (每日批 / 实时 / 事件触发) 由外部调度器控制, 不在 // LogReplayer 接口范围. LogReplayer 只提供 Replay 方法, 调度交给 // caller (cron / platform 层调度器 / 手动触发). type LogReplayer interface { Replay(ctx context.Context, from, to time.Time, filter FilterFunc) error RegisterReflector(r Reflector) } // FilterFunc 决定 LogEntry 是否进入 Replay 流. true 表示通过. type FilterFunc func(entry LogEntry) bool // ============================================================================ // LogSource: 历史决策日志源 // ============================================================================ // LogSource 是 LogReplayer 的底层数据源. // // 为什么独立于 LogReplayer: // LogSource 专注"数据读取", LogReplayer 专注"回放逻辑" (配对 / 过滤 / // 分发). 同一 LogReplayer 可接不同 LogSource (文件 / SQL / 对象存储). type LogSource interface { Read(ctx context.Context, from, to time.Time) (<-chan LogEntry, error) } // LogEntry 是一条决策日志的最低契约. Payload 存具体决策内容, // 不同场景结构不同. type LogEntry struct { Timestamp time.Time DecisionID string Entity string Payload any Meta map[string]any } // ============================================================================ // FeedbackChannel: KPI 双向通道 // ============================================================================ // FeedbackChannel 是业务 KPI 反馈的双向通道. // // 合并原 FitnessSignal + FeedbackSource: // 早期设计 FitnessSignal 负责 push, FeedbackSource 负责 pull. 两者是 // 同一份数据的两种访问模式, 合并减少概念负担. 实现可任选或都支持. // // 领域无关: // Metric 是字符串 (如 "carrier_on_time_rate"), Value 是 float64 标量. // 非标量 KPI 通过 Meta 传递. 引擎不假设任何业务 KPI 结构. type FeedbackChannel interface { Report(ctx context.Context, entity string, metric string, value float64, confidence float64, meta map[string]any) error Query(ctx context.Context, entity string, since time.Time, metric string) ([]Feedback, error) } // Feedback 是一条业务 KPI 反馈记录. type Feedback struct { Timestamp time.Time Entity string Metric string Value float64 Confidence float64 Meta map[string]any } // ============================================================================ // ShadowRunner: 影子模式执行 // ============================================================================ // ShadowRunner 把候选参数值影子运行, 不影响生产, 返回 fitness 对比. // // Traffic 语义: // 0.0-1.0 之间, 表示影子模式覆盖的流量比例. 0.1 = 10% 请求走影子, // 90% 走生产. 1.0 = 纯离线 replay 不影响实时流量. // // 和 ParameterStore 的边界: // ShadowRunner 不写 ParameterStore, 只做"并行跑 + 评分". caller 根据 // ShadowResult 决定是否调 ParameterEvolver.Apply 推生产. // // 三步灰度流程 (v0.4 RL policy 落地路径): // 1. Shadow (ShadowRunner, 生产不受影响) // 2. A/B (小比例灰度, 1-10% 真实流量) // 3. 生产 (全量切换) // // ShadowRunner 负责第 1 步, 后两步由 platform 层调度. type ShadowRunner interface { RunShadow(ctx context.Context, baselineKey string, candidateValue any, traffic float64) (ShadowResult, error) } // ShadowResult is the per-run comparison produced by ShadowRunner. Divergence // sits on 0-1 (0 = baseline and candidate agree on every sample, 1 = they // disagree everywhere). // // BaselineBreakdown / CandidateBreakdown hand back the per-dimension score // decomposition (the same breakdown Evaluator returns), so the gate UI can // explain why the aggregate fitness differs -- not just "candidate is 0.03 // higher" but "candidate wins on latency, loses on cost". Meta is a free-form // slot for runners to annotate the run (traffic window, tenant, sampler // version). // // All three are dead in scan-baseline because core ships only the shadow // scoring path; the decision UI / A-B gate scheduler / operator dashboard // that consume breakdowns live in platform or caller code. test lock is in // shadow_runner_default_test.go; Meta has no test lock (extension slot, by // design). // // ShadowResult 是 ShadowRunner 单次影子运行的对比结果. Divergence 在 0-1 // 之间 (0 = 每个样本 baseline 和 candidate 一致, 1 = 全部分歧). // // BaselineBreakdown / CandidateBreakdown 把 Evaluator 返回的分维度打分透出 // 来, 让灰度决策面板能解释总分差异来源 -- 不只是 "候选高 0.03", 而是 "候选 // 胜在延迟, 输在成本". Meta 是 free-form 扩展槽, runner 可挂流量窗口 / 租户 / // sampler 版本等标注. // // 3 字段在 scan-baseline 里标 dead 是因为 core 只出影子打分主路径; 消费 // breakdown 的决策 UI / A-B 灰度调度器 / 运营 dashboard 在 platform 或 // caller 侧. test 锁在 shadow_runner_default_test.go; Meta 无 test 锁 // (扩展槽, 刻意留白). type ShadowResult struct { BaselineFitness float64 CandidateFitness float64 BaselineBreakdown map[string]float64 CandidateBreakdown map[string]float64 SampleSize int Divergence float64 Meta map[string]any } // ============================================================================ // ApprovalFunc (defined in evolve.go) // ============================================================================ // // ApprovalFunc is the approval hook for parameter / proposal changes, defined // in evolve.go as a function type (not an interface). Included in this matrix // for documentation completeness. Signature: // // func(ctx context.Context, proposal *EvolutionProposal) (bool, error) // // Returning false rejects the change. nil ApprovalFunc rejects all proposals // (safe default, see Config.ApprovalFunc in evolve.go). // ============================================================================ // Errors // ============================================================================ // ErrParameterNotFound is returned when Get / History / Rollback is called // on a non-existent key. var ErrParameterNotFound = errors.New("evolve: parameter not found") // ErrReasonRequired is returned when Set / Rollback / Lock / Unlock is called // with empty reason. Audit chain requires human-readable reason. var ErrReasonRequired = errors.New("evolve: reason required for audit chain") // ErrParameterLocked is returned when Set is called on a locked key. Unlock // first, or use Rollback (allowed on locked keys by design). var ErrParameterLocked = errors.New("evolve: parameter is locked") // ErrVersionNotFound is returned when Rollback targets a non-existent version. var ErrVersionNotFound = errors.New("evolve: version not found") // ErrEntityRequired is returned by FeedbackChannel.Report / Query when entity // is the empty string. Entity distinguishes data streams (carrier / driver / // route / ...), collapsing it into a single bucket would poison Query results. var ErrEntityRequired = errors.New("evolve: entity required") // ErrMetricRequired is returned by FeedbackChannel.Report when metric is the // empty string. Query accepts empty metric as "all metrics". var ErrMetricRequired = errors.New("evolve: metric required") // ErrInvalidK is returned by Generator.Generate when K <= 0. var ErrInvalidK = errors.New("evolve: K must be > 0") // ErrLLMFailed wraps errors from the underlying LLM Complete call so callers // can distinguish LLM failure from parse failure via errors.Is. var ErrLLMFailed = errors.New("evolve: LLM call failed") // ErrCandidateParseFailed is returned when Generator cannot extract a JSON // array of candidates from the LLM response. The caller decides whether to // retry, relax parsing, or fall back to a static candidate set. var ErrCandidateParseFailed = errors.New("evolve: candidate parse failed")