// evolve_closed_loop exercises all 9 evolve interfaces in one runnable // pipeline, using only the file-backed reference implementations plus a // mock LLM client. Zero external deps, zero API keys, deterministic-ish // (wall-clock used for Feedback.Timestamp; the pipeline logic itself is // deterministic once inputs are fixed). // // Scenario: carrier risk penalty evolution. // - Historical decisions were logged at a baseline penalty value. // - KPI feedback (on_time_rate) flows in. // - We ask an LLM for candidate penalty adjustments. // - WeightedEvaluator scores each candidate on a simple fitness model. // - ProposerFunc turns feedback into a proposal tied to the winning candidate. // - ShadowRunner compares baseline vs proposed fitness over the historical // decision set. // - An approval gate (shadow delta + confidence) decides whether to Apply. // // Run: // // cd core && go run ./examples/evolve_closed_loop/ package main import ( "context" "fmt" "log" "os" "path/filepath" "time" "git.flytoex.net/yuanwei/flyto-agent/pkg/evolve" ) func main() { root, err := os.MkdirTemp("", "evolve-demo-") if err != nil { log.Fatalf("mkdir temp: %v", err) } defer os.RemoveAll(root) fmt.Printf("demo root: %s\n", root) ctx := context.Background() // Step 1: initialize 9 file-backed components + seed baseline. store := mustT(evolve.NewFileParameterStore(filepath.Join(root, "params"))) logSrc := mustT(evolve.NewFileLogSource(filepath.Join(root, "logs"))) fbch := mustT(evolve.NewFileFeedbackChannel(filepath.Join(root, "feedback"))) if _, err := store.Set(ctx, "carrier_risk_penalty", 1.0, "initial baseline"); err != nil { log.Fatalf("seed baseline: %v", err) } // Step 2: write 3 historical decisions. t0 := time.Date(2026, 4, 18, 9, 0, 0, 0, time.UTC) decisions := []evolve.LogEntry{ {Timestamp: t0, DecisionID: "d1", Entity: "carrier-Y-express", Payload: map[string]any{"penalty": 1.0, "order": "ord-1001"}}, {Timestamp: t0.Add(time.Hour), DecisionID: "d2", Entity: "carrier-Y-express", Payload: map[string]any{"penalty": 1.0, "order": "ord-1002"}}, {Timestamp: t0.Add(2 * time.Hour), DecisionID: "d3", Entity: "carrier-Z-freight", Payload: map[string]any{"penalty": 0.5, "order": "ord-1003"}}, } for _, d := range decisions { must0(logSrc.Append(ctx, d)) } printSection("Step 1-2: 3 decisions written to LogSource") // Step 3: write KPI feedbacks. Two on_time_rate samples for // carrier-Y-express (first-touch per-metric pairing will dedupe to the // earlier one per decision) plus one damage_rate sample to demonstrate // per-metric independence in the aggregator. d3 on carrier-Z-freight // gets no feedback so we can see the pending-count machinery. must0(fbch.Report(ctx, "carrier-Y-express", "on_time_rate", 0.72, 0.9, nil)) must0(fbch.Report(ctx, "carrier-Y-express", "on_time_rate", 0.68, 0.9, nil)) must0(fbch.Report(ctx, "carrier-Y-express", "damage_rate", 0.02, 0.8, nil)) printSection("Step 3: 3 feedbacks written; d3 on carrier-Z-freight stays pending") // Step 4: LogReplayer + AggregatorReflector. replayer := evolve.NewDefaultLogReplayer(logSrc, fbch, evolve.WithFeedbackWindow(30*24*time.Hour)) aggregator := evolve.NewAggregatorReflector() replayer.RegisterReflector(aggregator) must0(replayer.Replay(ctx, t0.Add(-time.Hour), time.Now().UTC().Add(time.Hour), nil)) printSection("Step 4: Replay complete, aggregator stats") fmt.Println(" (note: first-touch per-metric means each decision pairs with") fmt.Println(" the earliest feedback in its window; a second on_time_rate") fmt.Println(" sample for the same decision is dropped by design.)") for _, entity := range aggregator.Entities() { for _, metric := range aggregator.Metrics(entity) { s := aggregator.Stats(entity, metric) fmt.Printf(" %s / %s: count=%d mean=%.3f min=%.3f max=%.3f\n", entity, metric, s.Count, s.Mean, s.Min, s.Max) } p := aggregator.Stats(entity, "") if p.PendingCount > 0 { fmt.Printf(" %s / : %d decision(s) awaiting feedback\n", entity, p.PendingCount) } } // Step 5: mock LLM + LLMGenerator produce 3 candidates. fakeLLM := &mockLLM{response: `[ {"id":"bump","payload":{"penalty":1.5},"meta":{"strategy":"raise penalty"}}, {"id":"steady","payload":{"penalty":1.0},"meta":{"strategy":"keep baseline"}}, {"id":"relax","payload":{"penalty":0.6},"meta":{"strategy":"reduce penalty"}} ]`} generator := mustT(evolve.NewLLMGenerator(fakeLLM, evolve.WithModel("demo-model"))) cands, err := generator.Generate(ctx, map[string]any{"entity": "carrier-Y-express", "observed_on_time": 0.70}, 3, evolve.WithTemperature(0.7)) if err != nil { log.Fatalf("generate: %v", err) } printSection("Step 5: Generator produced candidates") for _, c := range cands { fmt.Printf(" %s -> %v (strategy=%v)\n", c.ID, c.Payload, c.Meta["strategy"]) } // Step 6: WeightedEvaluator scores each candidate. // Toy fitness model: penalty raises predicted on_time_rate but also risk // (lower risk is better, so its weight is negative). The shape does not // matter -- the point is the evaluator returns a comparable scalar. evaluator := mustT(evolve.NewWeightedEvaluator( evolve.WithFeature("predicted_on_time", func(c evolve.Candidate) float64 { p := penaltyOf(c) return 0.5 + 0.15*p }), evolve.WithFeature("predicted_risk", func(c evolve.Candidate) float64 { p := penaltyOf(c) return 1.0 / (1.0 + p) }), evolve.WithWeights(map[string]float64{"predicted_on_time": 1.0, "predicted_risk": -0.3}), )) var best evolve.Candidate bestFit := -1e18 printSection("Step 6: Evaluator scoring") for _, c := range cands { fit, bd, _ := evaluator.Score(ctx, c) fmt.Printf(" %s: fitness=%.4f breakdown=%v\n", c.ID, fit, bd) if fit > bestFit { bestFit = fit best = c } } fmt.Printf(" winner -> %s fitness=%.4f\n", best.ID, bestFit) // Step 7: Propose via DefaultParameterEvolver. evidence := []evolve.Feedback{ {Entity: "carrier-Y-express", Metric: "on_time_rate", Value: 0.72, Confidence: 0.9}, {Entity: "carrier-Y-express", Metric: "on_time_rate", Value: 0.68, Confidence: 0.9}, } proposer := func(_ context.Context, _ string, ev []evolve.Feedback) (any, float64, error) { var sum float64 for _, r := range ev { sum += r.Value } observed := sum / float64(len(ev)) return penaltyOf(best), observed, nil } audit := func(format string, args ...any) { fmt.Printf(" [AUDIT] "+format+"\n", args...) } evolver := mustT(evolve.NewDefaultParameterEvolver(store, proposer, evolve.WithAuditLogger(audit))) proposedValue, confidence, err := evolver.Propose(ctx, "carrier_risk_penalty", evidence) if err != nil { log.Fatalf("propose: %v", err) } printSection("Step 7: Proposer output") fmt.Printf(" proposed value: %v confidence: %.3f\n", proposedValue, confidence) // Step 8: ShadowRunner compares baseline vs proposed fitness. sampler := func(_ context.Context, _ float64) ([]evolve.Candidate, error) { out := make([]evolve.Candidate, 0, len(decisions)) for _, d := range decisions { out = append(out, evolve.Candidate{ ID: d.DecisionID, Payload: d.Payload, Meta: map[string]any{}, }) } return out, nil } applier := func(c evolve.Candidate, value any) evolve.Candidate { newPayload := map[string]any{"penalty": value} // Copy unrelated payload keys so the fitness model still sees them if // it needs them (future-proof for richer evaluators). if orig, ok := c.Payload.(map[string]any); ok { for k, v := range orig { if k != "penalty" { newPayload[k] = v } } } return evolve.Candidate{ID: c.ID, Payload: newPayload, Meta: c.Meta} } shadow := mustT(evolve.NewDefaultShadowRunner(store, evaluator, sampler, applier)) result, err := shadow.RunShadow(ctx, "carrier_risk_penalty", proposedValue, 1.0) if err != nil { log.Fatalf("shadow: %v", err) } printSection("Step 8: Shadow comparison") fmt.Printf(" baseline fitness: %.4f\n", result.BaselineFitness) fmt.Printf(" candidate fitness: %.4f\n", result.CandidateFitness) fmt.Printf(" divergence: %.4f (∈[0,1])\n", result.Divergence) fmt.Printf(" sample size: %d\n", result.SampleSize) // Step 9: approval gate + Apply. delta := result.CandidateFitness - result.BaselineFitness approved := delta > 0 && confidence > 0.5 reason := fmt.Sprintf("auto-approved: shadow delta +%.4f, candidate=%s, conf=%.2f", delta, best.ID, confidence) printSection(fmt.Sprintf("Step 9: approval gate approved=%v (delta>0=%v, conf>0.5=%v)", approved, delta > 0, confidence > 0.5)) if err := evolver.Apply(ctx, "carrier_risk_penalty", proposedValue, approved, reason); err != nil { log.Fatalf("apply: %v", err) } // Verify end state. finalValue, version, err := store.Get(ctx, "carrier_risk_penalty") if err != nil { log.Fatalf("verify: %v", err) } printSection("Final ParameterStore state") fmt.Printf(" key: carrier_risk_penalty\n") fmt.Printf(" value: %v (baseline was 1.0)\n", finalValue) fmt.Printf(" version: %d\n", version) fmt.Println("\nClosed loop complete. 9/9 interfaces exercised:") fmt.Println(" ParameterStore LogSource FeedbackChannel LogReplayer") fmt.Println(" Generator Evaluator Reflector ParameterEvolver ShadowRunner") } // penaltyOf extracts the "penalty" float from a Candidate's Payload. The // JSON roundtrip (LLMGenerator parses JSON into map[string]any) gives // float64; a Go-side-constructed Candidate may give any numeric type, so // we normalise via a tiny helper. func penaltyOf(c evolve.Candidate) float64 { m, ok := c.Payload.(map[string]any) if !ok { return 0 } switch v := m["penalty"].(type) { case float64: return v case float32: return float64(v) case int: return float64(v) } return 0 } // mockLLM returns a fixed JSON array regardless of input, making the demo // deterministic without a real LLM dependency. type mockLLM struct { response string } func (m *mockLLM) Complete(_ context.Context, _ string, _ evolve.LLMCallOpts) (string, error) { return m.response, nil } func mustT[T any](v T, err error) T { if err != nil { log.Fatal(err) } return v } func must0(err error) { if err != nil { log.Fatal(err) } } func printSection(title string) { fmt.Println() fmt.Printf("── %s ──\n", title) }