// Context Overflow 修正 -- 自动调整 max_tokens 避免超出上下文窗口.
//
// 精妙之处(CLEVER): API 返回 "input length and `max_tokens` exceed context limit:
// 188059 + 20000 > 200000" 时,自动计算安全的 max_tokens 值并注入 RetryContext,
// 让 Retryer 的下次尝试使用修正后的值.用户完全无感知.
//
// 升华改进(ELEVATED): 早期方案把这段逻辑混在 822 行的 withRetry 主循环中.
// 我们提取为独立模块,可以被 Retryer 或其他组件复用.
//
// 替代方案:<原方案嵌在 withRetry 的 catch 块中,与重试/退避/降级代码交织>
package retry

import (
	"regexp"
	"strconv"
)

// ============================================================
// ContextOverflowHandler
// ============================================================

// ContextOverflowHandler 处理 "input + max_tokens > context limit" 错误,
// 自动计算安全的 max_tokens 值.
type ContextOverflowHandler struct {
	// FloorOutputTokens 输出 token 最低保障(默认 3000)
	// 即使上下文快满了,也至少留这么多 token 给输出.
	// 低于此值意味着没有足够空间生成有用回复,不如直接失败.
	FloorOutputTokens int
	// SafetyBuffer 安全余量(默认 1000)
	// 防止因 token 估算误差导致再次溢出.
	SafetyBuffer int
	// ThinkingBudget 当前 thinking token 预算(0=未启用 thinking)
	// max_tokens 必须能容纳 thinking budget + 至少 1 个 output token.
	ThinkingBudget int
}

// DefaultOverflowHandler 返回默认配置.
func DefaultOverflowHandler() *ContextOverflowHandler {
	return &ContextOverflowHandler{
		FloorOutputTokens: 3000,
		SafetyBuffer:      1000,
	}
}

// OverflowInfo 是从错误消息中解析出的溢出信息.
type OverflowInfo struct {
	InputTokens  int // 实际输入 token 数
	MaxTokens    int // 当前设置的 max_tokens
	ContextLimit int // 模型的上下文窗口上限
}

// overflowRe 匹配 "input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000"
var overflowRe = regexp.MustCompile(
	`input length and .max_tokens. exceed context limit:\s*(\d+)\s*\+\s*(\d+)\s*>\s*(\d+)`,
)

// ParseOverflow 从错误消息中提取溢出信息.
// 返回 nil 表示消息不匹配(不是 context overflow 错误).
func ParseOverflow(message string) *OverflowInfo {
	matches := overflowRe.FindStringSubmatch(message)
	if len(matches) < 4 {
		return nil
	}
	input, err1 := strconv.Atoi(matches[1])
	maxTok, err2 := strconv.Atoi(matches[2])
	limit, err3 := strconv.Atoi(matches[3])
	if err1 != nil || err2 != nil || err3 != nil {
		return nil
	}
	return &OverflowInfo{
		InputTokens:  input,
		MaxTokens:    maxTok,
		ContextLimit: limit,
	}
}

// Adjust 计算修正后的 max_tokens.
// 返回修正值和 true,或 0 和 false(表示空间不足,不应重试).
//
// 精妙之处(CLEVER): 三层保护--
//  1. available = contextLimit - inputTokens - safetyBuffer
//  2. 必须 >= FloorOutputTokens(否则回复太短没意义)
//  3. 必须 >= thinkingBudget + 1(否则 thinking 无法运行)
func (h *ContextOverflowHandler) Adjust(info *OverflowInfo) (adjustedMax int, ok bool) {
	floor := h.FloorOutputTokens
	if floor <= 0 {
		floor = 3000
	}
	safety := h.SafetyBuffer
	if safety <= 0 {
		safety = 1000
	}

	available := info.ContextLimit - info.InputTokens - safety
	if available < floor {
		return 0, false // 空间不足,回复太短没意义
	}

	// thinking 预算要求
	minRequired := h.ThinkingBudget + 1
	if available < minRequired {
		return 0, false // 连 thinking 都放不下
	}

	// 取 available 和 floor 的较大值
	adjusted := available
	if adjusted < floor {
		adjusted = floor
	}

	return adjusted, true
}