package evolve import ( "context" "encoding/json" "errors" "fmt" "strings" "text/template" "time" ) // LLMClient is the narrow LLM contract used by LLMGenerator. // // Why not use flyto.Provider directly: // flyto.Provider is the full Agent session abstraction (tool calls, // streaming events, permissions). A generator only needs "prompt in, text // out". Depending on Provider would drag the entire engine into evolve and // break the "interface matrix consumable on its own" promise of the package. // // Consumers wrap their preferred backend (flyto.Provider, openai SDK, local // Ollama, etc) into something satisfying this interface. The wrapper lives // in the consumer, not evolve. type LLMClient interface { Complete(ctx context.Context, prompt string, opts LLMCallOpts) (string, error) } // LLMCallOpts carries per-call tuning knobs. Fields with zero values are // expected to fall back to implementation defaults (for example, the wrapper // chooses the default model). // // Temperature / TopP zero-semantics: zero means "unset" (let the wrapper // choose). The wrapper translates a non-zero value to a non-nil pointer // when populating flyto.Request, and leaves it nil otherwise so the // upstream provider applies its own default. Callers who genuinely want // deterministic 0.0 sampling cannot express it through these float64 // fields -- they must configure it on the provider Config layer instead. // This trade-off keeps the common path ergonomic (skip the option to use // defaults) at the cost of one obscure case. // // Temperature / TopP 零值语义: 零 = "未设" (让 wrapper 决定). wrapper // 在写 flyto.Request 时, 非零值翻译为非 nil 指针, 零值保持 nil 让上游 // provider 用自己默认. 想表达严格 deterministic 0.0 的调用方不能用 // 这两个 float64 字段, 必须改在 provider Config 层固定. 这个取舍换来 // 常用路径 (省略 option 即用默认) 的简洁, 代价是一个偏门 case. type LLMCallOpts struct { Temperature float64 TopP float64 Model string MaxTokens int } // LLMGenerator is the LLM-backed Generator reference implementation. // // Pipeline: // 1. Render promptTemplate with {K, Roles, RolesJoined, Input}. // 2. Call client.Complete(ctx, prompt, opts). // 3. Extract a JSON array from the response (strip markdown fences if any). // 4. Unmarshal to []rawCandidate, produce []Candidate with auto Meta. // // LEGACY: structured output (OpenAI response_format=json_schema, Anthropic // tool-use forced JSON) would eliminate the markdown-fence unwrap step, but // support is uneven across providers as of 2026-04. Stay on prompt + parse // for MVP; upgrade path is a new Option that delegates extraction to the // LLMClient when the backend supports it. type LLMGenerator struct { client LLMClient model string maxTokens int tmpl *template.Template } // GeneratorOption configures an LLMGenerator at construction. type GeneratorOption func(*LLMGenerator) error // WithModel pins a default model string attached to every Complete call and // recorded on Candidate.Meta["model"]. func WithModel(m string) GeneratorOption { return func(g *LLMGenerator) error { g.model = m return nil } } // WithMaxTokens caps the LLMClient output length per call. Zero = backend // default. func WithMaxTokens(n int) GeneratorOption { return func(g *LLMGenerator) error { if n < 0 { return fmt.Errorf("evolve: WithMaxTokens requires n >= 0, got %d", n) } g.maxTokens = n return nil } } // WithPromptTemplate overrides the default prompt template. The template is // parsed with text/template and receives fields: K (int), Roles ([]string), // RolesJoined (string), Input (string, already JSON-encoded when the // original input was not a string). func WithPromptTemplate(tmpl string) GeneratorOption { return func(g *LLMGenerator) error { t, err := template.New("evolve-generator").Parse(tmpl) if err != nil { return fmt.Errorf("evolve: parse prompt template: %w", err) } g.tmpl = t return nil } } // CLEVER: default prompt is English. LLM stability on JSON-structured output // is materially higher for English prompts across Anthropic / OpenAI / Gemini // / MiniMax families (2026 eval data). Chinese prompt localisation is a // consumer-layer decision via WithPromptTemplate. const defaultPromptTemplate = `You are tasked with generating {{.K}} candidate solutions for the given input. {{if .Roles}}Adopt these perspectives in your reasoning: {{.RolesJoined}}.{{end}} Return ONLY a JSON array of up to {{.K}} objects. No prose, no code fences. Each object must have these keys: - "id": short unique string identifying this candidate - "payload": the candidate solution (any JSON value) - "meta": optional map of additional annotations Input: {{.Input}} ` // NewLLMGenerator builds an LLMGenerator. client is required. func NewLLMGenerator(client LLMClient, opts ...GeneratorOption) (*LLMGenerator, error) { if client == nil { return nil, errors.New("evolve: NewLLMGenerator requires non-nil LLMClient") } t, err := template.New("evolve-generator").Parse(defaultPromptTemplate) if err != nil { return nil, fmt.Errorf("evolve: parse default template: %w", err) } g := &LLMGenerator{ client: client, tmpl: t, } for _, o := range opts { if err := o(g); err != nil { return nil, err } } return g, nil } // rawCandidate is the on-wire shape expected from the LLM response. type rawCandidate struct { ID string `json:"id"` Payload any `json:"payload"` Meta map[string]any `json:"meta"` } // Generate implements Generator. func (g *LLMGenerator) Generate(ctx context.Context, input any, K int, opts ...GenOpt) ([]Candidate, error) { if K <= 0 { return nil, ErrInvalidK } if err := ctx.Err(); err != nil { return nil, err } cfg := genConfig{} for _, o := range opts { o(&cfg) } prompt, err := g.renderPrompt(input, K, cfg.Roles) if err != nil { return nil, err } resp, err := g.client.Complete(ctx, prompt, LLMCallOpts{ Temperature: cfg.Temperature, TopP: cfg.TopP, Model: g.model, MaxTokens: g.maxTokens, }) if err != nil { return nil, fmt.Errorf("%w: %v", ErrLLMFailed, err) } raws, err := parseCandidates(resp) if err != nil { return nil, err } // CLEVER: Generator does not truncate or pad to K. LLM sampling is // noisy; the caller sees the real count and decides whether to retry or // accept. A silent cap/pad would mask model quality regressions. now := time.Now().UTC() out := make([]Candidate, 0, len(raws)) for i, r := range raws { id := strings.TrimSpace(r.ID) if id == "" { id = fmt.Sprintf("cand-%d", i+1) } out = append(out, Candidate{ ID: id, Payload: r.Payload, Meta: buildMeta(r.Meta, cfg, g.model, now), }) } return out, nil } // renderPrompt applies the template. Inputs that are already strings are // passed through verbatim; anything else is JSON-encoded so the template has // a deterministic text representation. func (g *LLMGenerator) renderPrompt(input any, K int, roles []string) (string, error) { var inputText string switch v := input.(type) { case nil: inputText = "(no input)" case string: inputText = v default: buf, err := json.Marshal(v) if err != nil { return "", fmt.Errorf("evolve: marshal input: %w", err) } inputText = string(buf) } var sb strings.Builder data := struct { K int Roles []string RolesJoined string Input string }{ K: K, Roles: roles, RolesJoined: strings.Join(roles, ", "), Input: inputText, } if err := g.tmpl.Execute(&sb, data); err != nil { return "", fmt.Errorf("evolve: execute template: %w", err) } return sb.String(), nil } // parseCandidates extracts a JSON array from raw, tolerating markdown fences // and surrounding commentary. Returns ErrCandidateParseFailed for any // shape that cannot be decoded into []rawCandidate. func parseCandidates(raw string) ([]rawCandidate, error) { body := extractJSONArray(raw) if body == "" { return nil, fmt.Errorf("%w: no JSON array found in response", ErrCandidateParseFailed) } var raws []rawCandidate if err := json.Unmarshal([]byte(body), &raws); err != nil { return nil, fmt.Errorf("%w: %v", ErrCandidateParseFailed, err) } return raws, nil } // extractJSONArray returns the first top-level JSON array substring in s, or // "" if none found. Handles these common patterns: // - pure JSON array // - markdown-fenced: ```json\n[...]\n``` or ```\n[...]\n``` // - JSON array surrounded by model commentary ("Here are K candidates: [...]") // // Depth-aware: walks bracket balance so arrays containing nested brackets // ([[1],[2]]) are extracted whole. func extractJSONArray(s string) string { s = stripCodeFence(s) start := strings.Index(s, "[") if start < 0 { return "" } depth := 0 inString := false escape := false for i := start; i < len(s); i++ { c := s[i] if escape { escape = false continue } if inString { switch c { case '\\': escape = true case '"': inString = false } continue } switch c { case '"': inString = true case '[': depth++ case ']': depth-- if depth == 0 { return s[start : i+1] } } } return "" } // stripCodeFence removes a leading/trailing ``` fence (optionally language- // tagged) if the whole response is wrapped in one. Non-fenced input is // returned unchanged. func stripCodeFence(s string) string { t := strings.TrimSpace(s) if !strings.HasPrefix(t, "```") { return s } // Drop the opening fence line (```lang\n or ```\n). nl := strings.Index(t, "\n") if nl < 0 { return s } body := t[nl+1:] if idx := strings.LastIndex(body, "```"); idx >= 0 { body = body[:idx] } return body } // buildMeta merges LLM-supplied meta with auto-populated fields. Auto fields // take precedence -- the LLM cannot override the true sampling parameters / // model that were actually dispatched, and cannot fake the generated_at // timestamp. Recording top_p alongside temperature lets ParameterEvolver and // downstream evaluators trace the full sampling configuration of any // candidate, not just half of it. // // buildMeta 把 LLM 给的 meta 与自动填充字段合并. 自动字段优先 -- LLM 不能 // 覆盖真实派发的采样参数 / 模型, 也不能伪造 generated_at 时间戳. 把 top_p // 与 temperature 一并记录, 让 ParameterEvolver 与下游 evaluator 能追溯 // 候选的完整采样配置而不是只追一半. func buildMeta(llmMeta map[string]any, cfg genConfig, model string, now time.Time) map[string]any { out := make(map[string]any, len(llmMeta)+5) for k, v := range llmMeta { out[k] = v } out["temperature"] = cfg.Temperature out["top_p"] = cfg.TopP out["model"] = model out["roles"] = append([]string(nil), cfg.Roles...) out["generated_at"] = now return out }