package engine

// input.go 实现输入处理管道.
//
// 处理用户输入,在发送给 API 之前进行预处理.
// 支持的功能:
//   - 文件引用展开:支持 @path/to/file 或 files:path/to/file 语法
//   - 图片引用:检测图片文件路径,读取为 base64 编码的 image content block
//   - URL 展开:检测 http(s):// URL 并标记
//   - 斜杠命令检测:检测 /command 开头的输入
//
// 设计要点:
//   - 只做预处理,不执行实际命令(由 Engine 的消费层处理)
//   - 大文件只读取前 200 行
//   - 图片返回 image content block(API 支持多模态)
//   - 线程安全,无状态(除了 cwd 和 fileCache 引用)

import (
	"encoding/base64"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"strings"

	"git.flytoex.net/yuanwei/flyto-agent/pkg/query"
)

// 文件引用展开时的最大行数限制
const maxFileReferenceLines = 200

// 支持的图片后缀
var imageExtensions = map[string]string{
	".png":  "image/png",
	".jpg":  "image/jpeg",
	".jpeg": "image/jpeg",
	".gif":  "image/gif",
	".webp": "image/webp",
	".tif":  "image/tiff",
	".tiff": "image/tiff",
	".heic": "image/heic",
	".heif": "image/heic",
}

// 文件引用正则:@path/to/file 或 files:path/to/file
// 匹配 @ 或 files: 开头,后跟非空白字符的路径
// 历史包袱(LEGACY): 文件引用正则的复杂性--要同时支持 @/abs/path,@./rel/path,@~/home/path
// 和 @filename 四种形式,还要避免误匹配邮箱(user@example.com).
// 当前正则要求 @ 后面紧跟路径分隔符或字母,邮箱中 @ 后面是域名也会被匹配到.
// 未来改进:考虑用更明确的语法如 @{path} 来消除歧义.
var fileRefPattern = regexp.MustCompile(`(?:@|files:)((?:/|\.\.?/|~/)[\S]+|[\w][\S]*)`)

// URL 正则:匹配 http:// 或 https:// 开头的 URL
var urlPattern = regexp.MustCompile(`https?://[^\s<>"{}|\\^` + "`" + `\[\]]+`)

// 斜杠命令正则:以 / 开头,后跟字母
var slashCmdPattern = regexp.MustCompile(`^/([a-zA-Z][\w-]*)(?:\s+(.*))?$`)

// InputProcessor 处理用户输入的预处理器.
type InputProcessor struct {
	cwd       string          // 当前工作目录
	fileCache *FileStateCache // 文件缓存(记录引用的文件)
}

// ProcessedInput 是预处理后的输入结果.
type ProcessedInput struct {
	// Text 处理后的文本(文件引用已展开)
	Text string

	// ContentBlocks 额外的内容块(图片等)
	ContentBlocks []query.Content

	// ReferencedFiles 引用的文件列表
	ReferencedFiles []string

	// DetectedURLs 检测到的 URL
	DetectedURLs []string

	// SlashCommand 如果是斜杠命令,返回命令信息
	SlashCommand *SlashCommand

	// IsEmpty 输入是否为空
	IsEmpty bool
}

// SlashCommand 表示一个斜杠命令.
type SlashCommand struct {
	Name string // 命令名(不含 /)
	Args string // 命令参数
}

// NewInputProcessor 创建一个输入预处理器.
//
// 参数:
//   - cwd: 当前工作目录,用于解析相对路径
//   - fileCache: 文件状态缓存,引用的文件会被记录到缓存中.可为 nil.
func NewInputProcessor(cwd string, fileCache *FileStateCache) *InputProcessor {
	return &InputProcessor{
		cwd:       cwd,
		fileCache: fileCache,
	}
}

// Process 预处理用户输入.
// 按以下顺序处理:
//  1. 检测是否为空输入
//  2. 检测斜杠命令(如果是斜杠命令,跳过其他处理)
//  3. 检测并展开文件引用
//  4. 检测图片引用
//  5. 检测 URL
func (p *InputProcessor) Process(input string) (*ProcessedInput, error) {
	result := &ProcessedInput{}

	// 检测空输入
	trimmed := strings.TrimSpace(input)
	if trimmed == "" {
		result.IsEmpty = true
		return result, nil
	}

	// 检测斜杠命令
	if cmd := p.detectSlashCommand(trimmed); cmd != nil {
		result.SlashCommand = cmd
		result.Text = trimmed
		return result, nil
	}

	// 检测并展开文件引用
	expandedText, referencedFiles := p.expandFileReferences(trimmed)
	result.Text = expandedText
	result.ReferencedFiles = referencedFiles

	// 检测图片引用(从引用的文件中筛选图片)
	imageBlocks := p.detectImages(trimmed)
	result.ContentBlocks = imageBlocks

	// 检测 URL
	result.DetectedURLs = detectURLs(trimmed)

	return result, nil
}

// expandFileReferences 展开输入中的文件引用.
// 支持 @path/to/file 和 files:path/to/file 语法.
// 将文件内容替换到输入文本中(用代码块包裹).
// 大文件只读取前 maxFileReferenceLines 行.
//
// 返回展开后的文本和引用的文件路径列表.
func (p *InputProcessor) expandFileReferences(text string) (string, []string) {
	matches := fileRefPattern.FindAllStringSubmatchIndex(text, -1)
	if len(matches) == 0 {
		return text, nil
	}

	var referencedFiles []string
	var result strings.Builder
	lastEnd := 0

	for _, match := range matches {
		// match[0]:match[1] 是完整匹配
		// match[2]:match[3] 是捕获组(路径部分)
		fullStart, fullEnd := match[0], match[1]
		pathStart, pathEnd := match[2], match[3]
		filePath := text[pathStart:pathEnd]

		// 升华改进(ELEVATED): 过滤掉疑似邮箱地址的误匹配--
		// 在非编程场景(如客服对话,邮件处理),用户输入中邮箱地址远多于文件引用.
		// 检测 @ 前面紧跟字母/数字(如 user@example.com 中的 r@),跳过该匹配.
		// 替代方案:<原方案不做过滤,依赖正则匹配结果直接当作文件路径处理>
		if fullStart > 0 && text[fullStart] == '@' {
			prevChar := text[fullStart-1]
			if (prevChar >= 'a' && prevChar <= 'z') || (prevChar >= 'A' && prevChar <= 'Z') ||
				(prevChar >= '0' && prevChar <= '9') {
				// 前一个字符是字母或数字,疑似邮箱,保留原文
				result.WriteString(text[lastEnd:fullEnd])
				lastEnd = fullEnd
				continue
			}
		}

		// 写入匹配之前的文本
		result.WriteString(text[lastEnd:fullStart])
		lastEnd = fullEnd

		// 解析路径
		absPath := p.resolvePath(filePath)

		// 检查是否为图片文件(图片不在文本中展开)
		ext := strings.ToLower(filepath.Ext(absPath))
		if _, isImage := imageExtensions[ext]; isImage {
			// 图片文件保留原始引用
			result.WriteString(text[fullStart:fullEnd])
			referencedFiles = append(referencedFiles, absPath)
			continue
		}

		// 读取文件内容
		content := p.readFileForReference(absPath)
		if content == "" {
			// 文件不存在或无法读取,保留原始引用
			result.WriteString(text[fullStart:fullEnd])
			continue
		}

		referencedFiles = append(referencedFiles, absPath)

		// 用代码块包裹文件内容
		result.WriteString(fmt.Sprintf("\n```%s (%s)\n%s\n```\n", filepath.Base(absPath), absPath, content))
	}

	// 写入最后一段文本
	result.WriteString(text[lastEnd:])

	return result.String(), referencedFiles
}

// detectImages 检测输入文本中引用的图片文件.
// 返回 image content block 列表.
func (p *InputProcessor) detectImages(text string) []query.Content {
	matches := fileRefPattern.FindAllStringSubmatch(text, -1)
	if len(matches) == 0 {
		return nil
	}

	var blocks []query.Content

	for _, match := range matches {
		if len(match) < 2 {
			continue
		}
		filePath := match[1]
		absPath := p.resolvePath(filePath)

		ext := strings.ToLower(filepath.Ext(absPath))
		mediaType, isImage := imageExtensions[ext]
		if !isImage {
			continue
		}

		// 读取图片文件并编码为 base64
		data, err := os.ReadFile(absPath)
		if err != nil {
			continue
		}

		encoded := base64.StdEncoding.EncodeToString(data)

		// 记录到文件缓存
		if p.fileCache != nil {
			p.fileCache.Record(absPath, data)
		}

		blocks = append(blocks, query.Content{
			Type: query.ContentImage,
			Source: &query.ContentSource{
				Type:      "base64",
				MediaType: mediaType,
				Data:      encoded,
			},
			SizeBytes: int64(len(data)),
		})
	}

	return blocks
}

// detectURLs 检测输入文本中的 URL.
// 返回检测到的 URL 列表.
func detectURLs(text string) []string {
	matches := urlPattern.FindAllString(text, -1)
	if len(matches) == 0 {
		return nil
	}

	// 去重
	seen := make(map[string]bool, len(matches))
	var urls []string
	for _, u := range matches {
		if !seen[u] {
			seen[u] = true
			urls = append(urls, u)
		}
	}

	return urls
}

// detectSlashCommand 检测输入是否为斜杠命令.
// 如果是,返回 SlashCommand;否则返回 nil.
func (p *InputProcessor) detectSlashCommand(text string) *SlashCommand {
	match := slashCmdPattern.FindStringSubmatch(text)
	if match == nil {
		return nil
	}

	cmd := &SlashCommand{
		Name: match[1],
	}
	if len(match) > 2 {
		cmd.Args = strings.TrimSpace(match[2])
	}

	return cmd
}

// readFileForReference 读取文件用于引用展开.
// 大文件只读取前 maxFileReferenceLines 行.
// 如果文件不存在或无法读取,返回空字符串.
func (p *InputProcessor) readFileForReference(path string) string {
	data, err := os.ReadFile(path)
	if err != nil {
		return ""
	}

	// 记录到文件缓存
	if p.fileCache != nil {
		p.fileCache.Record(path, data)
	}

	content := string(data)

	// 检查行数,超过限制则截断
	lines := strings.Split(content, "\n")
	if len(lines) > maxFileReferenceLines {
		truncated := strings.Join(lines[:maxFileReferenceLines], "\n")
		return truncated + fmt.Sprintf("\n... (truncated, showing first %d of %d lines)", maxFileReferenceLines, len(lines))
	}

	return content
}

// resolvePath 解析文件路径为绝对路径.
// 支持 ~ 展开和相对路径解析.
//
// 安全(CLEVER): 对相对路径,解析后校验是否仍在 cwd 边界内.
// 攻击者可通过 @../../etc/passwd 等引用越界读取文件.
// 绝对路径和 ~ 路径是用户显式指定的,不受 cwd 边界限制.
func (p *InputProcessor) resolvePath(filePath string) string {
	// 展开 ~ 为 home 目录
	if strings.HasPrefix(filePath, "~/") {
		home, err := os.UserHomeDir()
		if err == nil {
			filePath = filepath.Join(home, filePath[2:])
		}
	}

	// 绝对路径:用户显式指定,直接返回
	if filepath.IsAbs(filePath) {
		return filepath.Clean(filePath)
	}

	// 相对路径,基于 cwd 解析
	absPath := filepath.Clean(filepath.Join(p.cwd, filePath))

	// 边界校验:相对路径解析后必须仍在 cwd 内.
	// 防止 "../../../etc/passwd" 之类的路径穿越攻击.
	if absPath != p.cwd && !strings.HasPrefix(absPath, p.cwd+string(filepath.Separator)) {
		return ""
	}

	return absPath
}