package evolve import ( "context" "errors" "math" "testing" ) // shadowFixture assembles a store + evaluator + sampler + applier for tests. // The default scoring model: // - applier writes paramValue into Candidate.Meta["p"] // - evaluator returns fitness = meta["p"] (assumed float64), breakdown // {"raw": meta["p"]} // // This gives baseline and candidate demonstrably different fitness when the // two parameter values differ. func shadowFixture(t *testing.T) (ParameterStore, Evaluator, *DefaultShadowRunner, []Candidate) { t.Helper() store := newStore(t) if _, err := store.Set(context.Background(), "baseline-p", 10.0, "seed"); err != nil { t.Fatal(err) } eval, err := NewFuncEvaluator(func(_ context.Context, c Candidate) (float64, map[string]float64, error) { p, _ := c.Meta["p"].(float64) return p, map[string]float64{"raw": p}, nil }) if err != nil { t.Fatal(err) } cands := []Candidate{ {ID: "c1", Meta: map[string]any{}}, {ID: "c2", Meta: map[string]any{}}, {ID: "c3", Meta: map[string]any{}}, } sampler := func(_ context.Context, _ float64) ([]Candidate, error) { return cands, nil } applier := func(c Candidate, value any) Candidate { v, _ := value.(float64) meta := make(map[string]any, len(c.Meta)+1) for k, vv := range c.Meta { meta[k] = vv } meta["p"] = v return Candidate{ID: c.ID, Payload: c.Payload, Meta: meta} } runner, err := NewDefaultShadowRunner(store, eval, sampler, applier) if err != nil { t.Fatalf("NewDefaultShadowRunner: %v", err) } return store, eval, runner, cands } func TestShadow_HappyPath(t *testing.T) { _, _, r, _ := shadowFixture(t) res, err := r.RunShadow(context.Background(), "baseline-p", 30.0, 0.5) if err != nil { t.Fatalf("RunShadow: %v", err) } if res.SampleSize != 3 { t.Errorf("SampleSize: %d", res.SampleSize) } if res.BaselineFitness != 10 { t.Errorf("baseline avg: want 10, got %v", res.BaselineFitness) } if res.CandidateFitness != 30 { t.Errorf("candidate avg: want 30, got %v", res.CandidateFitness) } if res.BaselineBreakdown["raw"] != 10 || res.CandidateBreakdown["raw"] != 30 { t.Errorf("breakdowns: b=%+v c=%+v", res.BaselineBreakdown, res.CandidateBreakdown) } // relative divergence |10-30| / 30 = 2/3 want := 20.0 / 30.0 if math.Abs(res.Divergence-want) > 1e-9 { t.Errorf("divergence: want %v, got %v", want, res.Divergence) } } func TestShadow_SameValueZeroDivergence(t *testing.T) { _, _, r, _ := shadowFixture(t) res, err := r.RunShadow(context.Background(), "baseline-p", 10.0, 1.0) if err != nil { t.Fatal(err) } if res.Divergence != 0 { t.Errorf("same value: divergence should be 0, got %v", res.Divergence) } if res.BaselineFitness != res.CandidateFitness { t.Errorf("fitness should match: b=%v c=%v", res.BaselineFitness, res.CandidateFitness) } } func TestShadow_EmptySampleReturnsZeroResult(t *testing.T) { store := newStore(t) _, _ = store.Set(context.Background(), "p", 1.0, "seed") eval, _ := NewFuncEvaluator(func(_ context.Context, c Candidate) (float64, map[string]float64, error) { return 0, nil, nil }) empty := func(context.Context, float64) ([]Candidate, error) { return nil, nil } appl := func(c Candidate, _ any) Candidate { return c } r, _ := NewDefaultShadowRunner(store, eval, empty, appl) res, err := r.RunShadow(context.Background(), "p", 2.0, 1.0) if err != nil { t.Fatal(err) } if res.SampleSize != 0 || res.BaselineFitness != 0 || res.CandidateFitness != 0 || res.Divergence != 0 { t.Errorf("empty sample: %+v", res) } // Meta still filled for audit correlation. if res.Meta["baseline_key"] != "p" || res.Meta["candidate_value"] != 2.0 { t.Errorf("meta on empty sample: %+v", res.Meta) } } func TestShadow_StoreReadError(t *testing.T) { _, _, r, _ := shadowFixture(t) _, err := r.RunShadow(context.Background(), "ghost-key", 1.0, 0.1) if !errors.Is(err, ErrParameterNotFound) { t.Errorf("unknown baseline: want ErrParameterNotFound, got %v", err) } } func TestShadow_SamplerError(t *testing.T) { store := newStore(t) _, _ = store.Set(context.Background(), "p", 0.0, "seed") eval, _ := NewFuncEvaluator(func(_ context.Context, c Candidate) (float64, map[string]float64, error) { return 0, nil, nil }) boom := errors.New("sampler boom") samp := func(context.Context, float64) ([]Candidate, error) { return nil, boom } appl := func(c Candidate, _ any) Candidate { return c } r, _ := NewDefaultShadowRunner(store, eval, samp, appl) _, err := r.RunShadow(context.Background(), "p", 1.0, 0.1) if !errors.Is(err, boom) { t.Errorf("sampler error: want wrap of boom, got %v", err) } } func TestShadow_EvaluatorError(t *testing.T) { store := newStore(t) _, _ = store.Set(context.Background(), "p", 0.0, "seed") boom := errors.New("eval boom") eval, _ := NewFuncEvaluator(func(_ context.Context, c Candidate) (float64, map[string]float64, error) { return 0, nil, boom }) samp := func(context.Context, float64) ([]Candidate, error) { return []Candidate{{ID: "c1"}}, nil } appl := func(c Candidate, _ any) Candidate { return c } r, _ := NewDefaultShadowRunner(store, eval, samp, appl) _, err := r.RunShadow(context.Background(), "p", 1.0, 0.1) if !errors.Is(err, boom) { t.Errorf("evaluator error: %v", err) } } func TestShadow_CtxCanceledBeforeStart(t *testing.T) { _, _, r, _ := shadowFixture(t) ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := r.RunShadow(ctx, "baseline-p", 30.0, 0.1) if !errors.Is(err, context.Canceled) { t.Errorf("canceled ctx: %v", err) } } func TestShadow_TrafficForwarded(t *testing.T) { store := newStore(t) _, _ = store.Set(context.Background(), "p", 0.0, "seed") eval, _ := NewFuncEvaluator(func(_ context.Context, c Candidate) (float64, map[string]float64, error) { return 0, nil, nil }) var seenTraffic float64 samp := func(_ context.Context, tr float64) ([]Candidate, error) { seenTraffic = tr return nil, nil } appl := func(c Candidate, _ any) Candidate { return c } r, _ := NewDefaultShadowRunner(store, eval, samp, appl) _, _ = r.RunShadow(context.Background(), "p", 1.0, 0.42) if seenTraffic != 0.42 { t.Errorf("traffic forwarded to sampler: got %v", seenTraffic) } } func TestShadow_MetaPopulated(t *testing.T) { _, _, r, _ := shadowFixture(t) res, err := r.RunShadow(context.Background(), "baseline-p", 99.0, 0.7) if err != nil { t.Fatal(err) } if res.Meta["baseline_key"] != "baseline-p" { t.Errorf("meta baseline_key: %v", res.Meta["baseline_key"]) } if res.Meta["candidate_value"] != 99.0 { t.Errorf("meta candidate_value: %v", res.Meta["candidate_value"]) } if res.Meta["traffic"] != 0.7 { t.Errorf("meta traffic: %v", res.Meta["traffic"]) } // baseline_value comes from store (JSON roundtrip => float64 = 10). if res.Meta["baseline_value"] != 10.0 { t.Errorf("meta baseline_value: %v", res.Meta["baseline_value"]) } } func TestShadow_BreakdownAveragedAcrossSamples(t *testing.T) { store := newStore(t) _, _ = store.Set(context.Background(), "p", 0.0, "seed") // Evaluator emits breakdown that depends only on the candidate ID digit, // not the parameter, so baseline and candidate breakdowns agree but are // non-trivially averaged. eval, _ := NewFuncEvaluator(func(_ context.Context, c Candidate) (float64, map[string]float64, error) { switch c.ID { case "c1": return 1, map[string]float64{"dim": 10}, nil case "c2": return 2, map[string]float64{"dim": 20}, nil case "c3": return 3, map[string]float64{"dim": 30}, nil } return 0, nil, nil }) samp := func(context.Context, float64) ([]Candidate, error) { return []Candidate{{ID: "c1"}, {ID: "c2"}, {ID: "c3"}}, nil } appl := func(c Candidate, _ any) Candidate { return c } r, _ := NewDefaultShadowRunner(store, eval, samp, appl) res, err := r.RunShadow(context.Background(), "p", 1.0, 1.0) if err != nil { t.Fatal(err) } wantAvg := 20.0 // (10+20+30)/3 if res.BaselineBreakdown["dim"] != wantAvg || res.CandidateBreakdown["dim"] != wantAvg { t.Errorf("breakdown avg: b=%v c=%v, want %v each", res.BaselineBreakdown["dim"], res.CandidateBreakdown["dim"], wantAvg) } } func TestShadow_DivergenceClampedToUnit(t *testing.T) { // Sign-crossing values: diff = 200, max(|a|, |b|) = 100, raw ratio = 2.0, // must clamp to 1.0. if d := relativeDivergence(-100.0, 100.0); d != 1.0 { t.Errorf("divergence clamp: got %v", d) } } func TestShadow_DivergenceSmallValuesDoNotBlowUp(t *testing.T) { // Both near zero: diff tiny, denominator eps-floored, result small. if d := relativeDivergence(1e-12, 2e-12); d > 1.0 || d < 0 { t.Errorf("divergence tiny values: got %v", d) } } func TestShadow_NilDepsRejected(t *testing.T) { store := newStore(t) eval, _ := NewFuncEvaluator(func(context.Context, Candidate) (float64, map[string]float64, error) { return 0, nil, nil }) samp := func(context.Context, float64) ([]Candidate, error) { return nil, nil } appl := func(c Candidate, _ any) Candidate { return c } if _, err := NewDefaultShadowRunner(nil, eval, samp, appl); err == nil { t.Error("nil store") } if _, err := NewDefaultShadowRunner(store, nil, samp, appl); err == nil { t.Error("nil evaluator") } if _, err := NewDefaultShadowRunner(store, eval, nil, appl); err == nil { t.Error("nil sampler") } if _, err := NewDefaultShadowRunner(store, eval, samp, nil); err == nil { t.Error("nil applier") } } func TestShadow_ImplementsInterface(t *testing.T) { _, _, r, _ := shadowFixture(t) var _ ShadowRunner = r }