package evolve import ( "context" "errors" "fmt" "math" ) // CandidateSampler supplies the test set for a shadow run. traffic is the // caller-requested coverage in [0, 1]; the sampler interprets it (hash // partition, time-window slice, random subsample, fixture replay) and // returns the selected Candidates. // // Returning nil/empty is legal and produces a zero-SampleSize ShadowResult // (no data, no divergence). type CandidateSampler func(ctx context.Context, traffic float64) ([]Candidate, error) // ParamApplier projects a parameter value onto a Candidate. The engine does // not know how "a carrier_risk_penalty value" shows up inside a candidate // decision; the caller encodes that projection. A minimal applier clones // the candidate and stores the value in Meta, letting the Evaluator pick // it up; a richer applier rewrites Payload. // // Purity contract: the applier must return a NEW Candidate (or a value-copy) // rather than mutate the input. DefaultShadowRunner calls applier twice per // test candidate (once for baseline, once for candidate param); in-place // mutation would produce a race. type ParamApplier func(c Candidate, paramValue any) Candidate // DefaultShadowRunner is the reference ShadowRunner. It runs each sampled // test Candidate twice through the Evaluator -- once with the baseline // parameter value, once with the proposed candidate value -- and reports // per-cohort average fitness, per-dimension mean breakdown, and a // divergence score in [0, 1]. // // What this does NOT do (by design): // - It does not touch ParameterStore.Set. Shadow comparison is read-only // relative to the production parameter chain. The caller decides, based // on the returned ShadowResult, whether to call ParameterEvolver.Apply. // - It does not decide how to sample. The caller supplies CandidateSampler. // - It does not parallelise scoring. Evaluator implementations (notably // FuncEvaluator wrapping caller code) are not guaranteed goroutine-safe. // A caller that needs throughput runs multiple RunShadow calls in // parallel from the outside. // // Divergence formula: // // div = min(|avg_baseline - avg_candidate| / max(|avg_baseline|, |avg_candidate|, 1e-9), 1.0) // // CLEVER: relative divergence (not absolute) so fitness scales (1000 vs // 1001 = 0.1% diff) are comparable with small-scale scales (0.1 vs 0.5 = // 400% diff). Absolute diff would produce wildly misleading divergence // values across domains. type DefaultShadowRunner struct { store ParameterStore evaluator Evaluator sampler CandidateSampler applier ParamApplier } // NewDefaultShadowRunner constructs the runner. All four dependencies are // required; any nil is rejected at construction so the first RunShadow call // does not explode with a nil-deref. func NewDefaultShadowRunner(store ParameterStore, evaluator Evaluator, sampler CandidateSampler, applier ParamApplier) (*DefaultShadowRunner, error) { if store == nil { return nil, errors.New("evolve: NewDefaultShadowRunner requires non-nil ParameterStore") } if evaluator == nil { return nil, errors.New("evolve: NewDefaultShadowRunner requires non-nil Evaluator") } if sampler == nil { return nil, errors.New("evolve: NewDefaultShadowRunner requires non-nil CandidateSampler") } if applier == nil { return nil, errors.New("evolve: NewDefaultShadowRunner requires non-nil ParamApplier") } return &DefaultShadowRunner{ store: store, evaluator: evaluator, sampler: sampler, applier: applier, }, nil } // RunShadow implements ShadowRunner. func (r *DefaultShadowRunner) RunShadow(ctx context.Context, baselineKey string, candidateValue any, traffic float64) (ShadowResult, error) { if err := ctx.Err(); err != nil { return ShadowResult{}, err } baselineValue, _, err := r.store.Get(ctx, baselineKey) if err != nil { return ShadowResult{}, fmt.Errorf("evolve: shadow read baseline %q: %w", baselineKey, err) } cands, err := r.sampler(ctx, traffic) if err != nil { return ShadowResult{}, fmt.Errorf("evolve: shadow sampler: %w", err) } if len(cands) == 0 { return ShadowResult{ Meta: buildShadowMeta(baselineKey, baselineValue, candidateValue, traffic), }, nil } var sumB, sumC float64 breakB := make(map[string]float64) breakC := make(map[string]float64) for _, c := range cands { if err := ctx.Err(); err != nil { return ShadowResult{}, err } fitB, bdB, err := r.evaluator.Score(ctx, r.applier(c, baselineValue)) if err != nil { return ShadowResult{}, fmt.Errorf("evolve: shadow score baseline: %w", err) } fitC, bdC, err := r.evaluator.Score(ctx, r.applier(c, candidateValue)) if err != nil { return ShadowResult{}, fmt.Errorf("evolve: shadow score candidate: %w", err) } sumB += fitB sumC += fitC accumulateBreakdown(breakB, bdB) accumulateBreakdown(breakC, bdC) } n := float64(len(cands)) avgB := sumB / n avgC := sumC / n for k := range breakB { breakB[k] /= n } for k := range breakC { breakC[k] /= n } return ShadowResult{ BaselineFitness: avgB, CandidateFitness: avgC, BaselineBreakdown: breakB, CandidateBreakdown: breakC, SampleSize: len(cands), Divergence: relativeDivergence(avgB, avgC), Meta: buildShadowMeta(baselineKey, baselineValue, candidateValue, traffic), }, nil } // accumulateBreakdown is a map add-in-place helper. nil src is a no-op. func accumulateBreakdown(dst, src map[string]float64) { for k, v := range src { dst[k] += v } } // relativeDivergence returns min(|a-b|/max(|a|, |b|, eps), 1.0), which is // bounded in [0, 1] and behaves well for small or crossing-sign values. func relativeDivergence(a, b float64) float64 { const eps = 1e-9 diff := math.Abs(a - b) denom := math.Max(math.Max(math.Abs(a), math.Abs(b)), eps) d := diff / denom if d > 1.0 { return 1.0 } if d < 0 { // Numerical defensive: diff is abs, denom positive, so d >= 0. The // clamp exists to guarantee the documented [0, 1] contract even if // a future change breaks that property. return 0 } return d } // buildShadowMeta tags the result with the parameters of the run so the // caller can correlate a ShadowResult with the Apply decision. func buildShadowMeta(baselineKey string, baselineValue, candidateValue any, traffic float64) map[string]any { return map[string]any{ "baseline_key": baselineKey, "baseline_value": baselineValue, "candidate_value": candidateValue, "traffic": traffic, } }