package builtin

// FileRead 工具 -- 读取本地文件内容.
//
// 这是 Agent 的基础感知能力之一:读取文件系统上的文件,
// 让 Agent 可以查看源代码,配置文件,日志,图片,PDF,Jupyter Notebook 等.
//
// 对应早期方案的完整功能集.
//
// 特性:
//   - 支持 offset/limit/pages 参数
//   - 带行号输出(cat -n 格式),方便模型定位代码
//   - 图片文件:base64 内联返回(含格式检测,尺寸信息)
//   - PDF 文件:检测 magic bytes,支持 pages 参数,返回文件元信息
//   - Jupyter Notebook:解析 .ipynb JSON,按 cell 格式化输出
//   - 设备文件阻止(/dev/zero, /dev/random 等无限/阻塞设备)
//   - 编码检测:UTF-8 BOM 跳过,UTF-16 LE/BE 检测,二进制文件检测
//   - 安全路径检查:符号链接检测,路径遍历防护,设备文件类型检测
//   - 文件状态缓存集成:记录内容哈希/大小/行数/mtime
//   - ConcurrencySafe: true,多个 FileRead 可以并行执行
//   - ReadOnly: true,不会修改文件系统

import (
	"bufio"
	"bytes"
	"context"
	"crypto/sha256"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"strings"
	"time"

	"git.flytoex.net/yuanwei/flyto-agent/pkg/permission"
	"git.flytoex.net/yuanwei/flyto-agent/pkg/tools"
)

// ─────────────────────────────────────────────────────────────────────
// 缓存接口
// ─────────────────────────────────────────────────────────────────────

// FileCacheRecorder 是文件缓存的记录接口.
// FileReadTool 通过此接口将读取的文件记录到缓存中,
// 而不直接依赖 engine 包(避免循环引用).
//
// 精妙之处(CLEVER): 用接口打破循环依赖--FileReadTool 在 builtin 包中,
// FileStateCache 在 engine 包中.如果直接引用会产生 builtin → engine → builtin 循环.
// 通过 FileCacheRecorder 接口解耦:builtin 只依赖接口,engine 注入实现.
type FileCacheRecorder interface {
	Record(path string, content []byte)
}

// FileHistoryRecorder 是文件历史的记录接口.
// FileEditTool/FileWriteTool 通过此接口在修改前备份文件内容,
// 而不直接依赖 engine 包(和 FileCacheRecorder 一样用接口打破循环依赖).
//
// 精妙之处(CLEVER): 和 FileCacheRecorder 对称设计--
// FileCacheRecorder 在"读后"记录,FileHistoryRecorder 在"写前"备份.
type FileHistoryRecorder interface {
	// BeforeEdit 编辑前备份(文件必须存在)
	BeforeEdit(filePath string, messageID string) error
	// BeforeWrite 写入前备份(文件不存在时记录为新建)
	BeforeWrite(filePath string, messageID string) error
}

// FileStateCacheEntry 表示文件状态缓存的一条记录.
// 对应原 TS 项目的 readFileState.set() 调用.
//
// 精妙之处(CLEVER): 缓存不仅记录内容,还记录读取时的元信息.
// 这让 dedup 逻辑可以精确判断"上次读的同一范围是否还有效".
// IsPartialView 标记很重要:partial read 不能用于编辑前的完整性验证.
//
// **消费形态 (同步回调 callback)**: `FileReadTool` 每次读文件后经
// `FileStateCacheRecorder.RecordState(path, entry)` 把 entry 塞给**外部
// 注入的 Recorder 实现**. Recorder 由消费者 (SaaS 平台 / IDE 插件 /
// 测试 harness) 自己写, 决定读哪些字段消费. 和 engine.PlanApprovalEvent /
// engine.ElicitationField 同构 (form 3 callback) -- scanner 视野内无
// 内部 reader 是期望状态, 字段是给外部消费者读的"参数载体", 不该强加内部
// reader 来"过扫描器". 见 docs/api-reference.md "API 消费形态" 章节.
//
// 字段解读 (供外部 Recorder 消费):
//   - ContentHash: 内容 SHA-256, 用于 dedup 或版本校验.
//   - Size / LineCount: 文件大小 / 行数, 用于 UI 展示 / 审计.
//   - ModTime: 文件修改时间, 用于 freshness 判断.
//   - IsPartialView: 本次读是否只读了部分 (偏移/长度限制). Recorder
//     若做"编辑前完整性验证"必须看这个标记.
//
// Consumption shape (callback / form 3): after every file read,
// FileReadTool calls FileStateCacheRecorder.RecordState(path, entry),
// handing the entry to an externally-injected Recorder implementation.
// The Recorder is written by the consumer (SaaS platform / IDE plugin /
// test harness) and decides which fields matter. Structurally identical
// to engine.PlanApprovalEvent / engine.ElicitationField (form 3
// callback) -- absence of an internal reader within the scanner's view
// is expected; fields are the "argument payload" for external
// consumers. See docs/api-reference.md "API consumption shapes" for
// the callback-shape catalogue.
type FileStateCacheEntry struct {
	ContentHash   string    // SHA-256 of content
	Size          int64     // 文件大小
	LineCount     int       // 总行数
	ModTime       time.Time // 文件修改时间
	IsPartialView bool      // 是否只读取了部分(有 offset)
}

// FileStateCacheRecorder 是文件状态缓存的记录接口.
// 区别于 FileCacheRecorder(记录原始内容),这个记录文件元数据状态.
type FileStateCacheRecorder interface {
	RecordState(path string, entry FileStateCacheEntry)
}

// ─────────────────────────────────────────────────────────────────────
// 工具结构体
// ─────────────────────────────────────────────────────────────────────

// FileReadTool 是文件读取工具.
type FileReadTool struct {
	fileCache  FileCacheRecorder      // 文件内容缓存(可选,为 nil 时不记录)
	stateCache FileStateCacheRecorder // 文件状态缓存(可选)
}

// NewFileReadTool 创建一个 FileRead 工具实例.
func NewFileReadTool() *FileReadTool {
	return &FileReadTool{}
}

// NewFileReadToolWithCache 创建一个带文件缓存的 FileRead 工具实例.
// cache 为 nil 时退化为无缓存行为,向后兼容.
func NewFileReadToolWithCache(cache FileCacheRecorder) *FileReadTool {
	return &FileReadTool{fileCache: cache}
}

// NewFileReadToolFull 创建一个完整配置的 FileRead 工具实例.
func NewFileReadToolFull(cache FileCacheRecorder, stateCache FileStateCacheRecorder) *FileReadTool {
	return &FileReadTool{fileCache: cache, stateCache: stateCache}
}

// fileReadInput 是 FileRead 工具的输入参数.
//
// 反向思考:offset 为什么从 0 开始而不是从 1 开始?
// - 原 TS 项目 offset 默认为 1(1-based),但 Go 版本用 0-based 更自然
// - 模型调用时给的是 "从第 N 行开始",0 表示文件开头
// - 内部行号显示是 1-based(cat -n 格式),但参数是 0-based
// - 保持与早期方案一致:offset=0 表示从头读
type fileReadInput struct {
	FilePath string `json:"file_path"`
	Offset   int    `json:"offset,omitempty"` // 起始行号(从 0 开始),默认 0
	Limit    int    `json:"limit,omitempty"`  // 读取行数,默认 2000
	Pages    string `json:"pages,omitempty"`  // PDF 页码范围,如 "1-5", "3", "10-20"
}

// Name 返回工具名称.
func (t *FileReadTool) Name() string {
	return "Read"
}

// Description 返回工具描述.
func (t *FileReadTool) Description(ctx context.Context) string {
	return "Reads a file from the local filesystem. " +
		"The file_path parameter must be an absolute path, not a relative path. " +
		"By default, it reads up to 2000 lines starting from the beginning of the file. " +
		"When you already know which part of the file you need, only read that part. " +
		"This tool can read images (PNG, JPG, GIF, WEBP) and returns base64 inline content. " +
		"This tool can read PDF files (.pdf). For large PDFs (more than 10 pages), " +
		"you MUST provide the pages parameter to read specific page ranges. Maximum 20 pages per request. " +
		"This tool can read Jupyter notebooks (.ipynb) and returns all cells with their outputs. " +
		"Results are returned with line numbers (cat -n format)."
}

// InputSchema 返回工具的 JSON Schema 输入定义.
func (t *FileReadTool) InputSchema() json.RawMessage {
	return json.RawMessage(`{
		"type": "object",
		"properties": {
			"file_path": {
				"type": "string",
				"description": "The absolute path to the file to read"
			},
			"offset": {
				"type": "integer",
				"description": "The line number to start reading from (0-based). Only provide if the file is too large to read at once.",
				"minimum": 0
			},
			"limit": {
				"type": "integer",
				"description": "The number of lines to read (default 2000). Only provide if the file is too large to read at once.",
				"minimum": 1
			},
			"pages": {
				"type": "string",
				"description": "Page range for PDF files (e.g., \"1-5\", \"3\", \"10-20\"). Only applicable to PDF files. Maximum 20 pages per request."
			}
		},
		"required": ["file_path"]
	}`)
}

// Metadata 返回工具元数据.
func (t *FileReadTool) Metadata() tools.Metadata {
	return tools.Metadata{
		ConcurrencySafe: true,
		ReadOnly:        true,
		Destructive:     false,
		Aliases:         []string{"FileRead"},
		SearchHint:      "read file content view image pdf notebook",
		PermissionClass: permission.PermClassFile,
		AuditOperation:  "read",
	}
}

// ─────────────────────────────────────────────────────────────────────
// 设备文件阻止列表
// ─────────────────────────────────────────────────────────────────────

// 精妙之处(CLEVER): 设备文件阻止是纯路径检查(无 I/O),在任何文件操作之前执行.
// 这防止了 Agent 被诱导去读取无限输出设备(/dev/zero 会无限产生零字节)
// 或阻塞输入设备(/dev/tty 会挂起进程等待终端输入).
// 安全的设备如 /dev/null 故意不在此列表中--它们有合法用途.
var blockedDevicePaths = map[string]bool{
	// 无限输出 -- 永远不会 EOF
	"/dev/zero":    true,
	"/dev/random":  true,
	"/dev/urandom": true,
	"/dev/full":    true,
	// 阻塞等待输入
	"/dev/stdin":   true,
	"/dev/tty":     true,
	"/dev/console": true,
	// 读取无意义
	"/dev/stdout": true,
	"/dev/stderr": true,
	// stdin/stdout/stderr 的 fd 别名
	"/dev/fd/0": true,
	"/dev/fd/1": true,
	"/dev/fd/2": true,
}

// isBlockedDevicePath 检查路径是否为被阻止的设备文件.
// 包括 /proc/<pid>/fd/0-2 这样的 Linux stdio 别名.
func isBlockedDevicePath(filePath string) bool {
	if blockedDevicePaths[filePath] {
		return true
	}
	// /proc/self/fd/0-2 和 /proc/<pid>/fd/0-2 是 Linux 的 stdio 别名
	if strings.HasPrefix(filePath, "/proc/") {
		if strings.HasSuffix(filePath, "/fd/0") ||
			strings.HasSuffix(filePath, "/fd/1") ||
			strings.HasSuffix(filePath, "/fd/2") {
			return true
		}
	}
	return false
}

// ─────────────────────────────────────────────────────────────────────
// 文件类型检测
// ─────────────────────────────────────────────────────────────────────

// 已知的图片文件扩展名(可 base64 内联返回给模型)
//
// 精妙之处(CLEVER): SVG 不在这里--它是文本格式,直接当文本读取.
// 原 TS 项目把 SVG 归为图片扩展名但实际作为文本处理,
// 我们明确分离:imageExtensions 只包含真正需要 base64 编码的二进制图片.
var imageExtensions = map[string]bool{
	".png":  true,
	".jpg":  true,
	".jpeg": true,
	".gif":  true,
	".webp": true,
	".tif":  true,
	".tiff": true,
	".heic": true,
	".heif": true,
}

// isImageFile 判断文件是否为需要 base64 内联的图片文件.
func isImageFile(path string) bool {
	ext := strings.ToLower(filepath.Ext(path))
	return imageExtensions[ext]
}

// isSVGFile 判断文件是否为 SVG 文件(作为文本返回).
func isSVGFile(path string) bool {
	return strings.ToLower(filepath.Ext(path)) == ".svg"
}

// isPDFFile 判断文件是否为 PDF 文件.
func isPDFFile(path string) bool {
	return strings.ToLower(filepath.Ext(path)) == ".pdf"
}

// isNotebookFile 判断文件是否为 Jupyter Notebook.
func isNotebookFile(path string) bool {
	return strings.ToLower(filepath.Ext(path)) == ".ipynb"
}

// ─────────────────────────────────────────────────────────────────────
// 安全路径检查
// ─────────────────────────────────────────────────────────────────────

// 精妙之处(CLEVER): 安全路径验证在文件打开之前执行,这是 "fail fast" 原则.
// 反向思考:为什么不用 filepath.EvalSymlinks 一步到位?
// - EvalSymlinks 会触发实际的文件系统 I/O(stat 每个路径段)
// - 对于不存在的文件会报错,但我们需要区分"不存在"和"不安全"
// - 先做 Clean 规范化 + 前缀检查,再做 Lstat 符号链接检测,层层递进

// validatePath 验证文件路径的安全性.
// 返回规范化后的绝对路径,或者错误.
func validatePath(filePath string) (string, error) {
	if filePath == "" {
		return "", fmt.Errorf("file_path is required")
	}

	// 规范化路径:解析 . 和 .. ,多余的 /
	cleaned := filepath.Clean(filePath)

	// 必须是绝对路径
	if !filepath.IsAbs(cleaned) {
		return "", fmt.Errorf("file_path must be an absolute path, got: %s", filePath)
	}

	return cleaned, nil
}

// checkSymlink 检查路径是否为符号链接.
// 返回 (isSymlink, realPath, error)
// 如果是符号链接,realPath 是解引用后的真实路径.
func checkSymlink(filePath string) (bool, string, error) {
	linfo, err := os.Lstat(filePath)
	if err != nil {
		return false, filePath, err
	}

	if linfo.Mode()&os.ModeSymlink != 0 {
		realPath, err := filepath.EvalSymlinks(filePath)
		if err != nil {
			return true, filePath, fmt.Errorf("cannot resolve symlink: %w", err)
		}
		return true, realPath, nil
	}

	return false, filePath, nil
}

// ─────────────────────────────────────────────────────────────────────
// 编码检测
// ─────────────────────────────────────────────────────────────────────

// 精妙之处(CLEVER): 编码检测顺序很重要--先检测 BOM(确定性最高),
// 再检测 null bytes(二进制文件),最后回退到 UTF-8.
// 这个顺序与 VS Code 的文件编码检测逻辑类似.

// encodingResult 是编码检测的结果.
type encodingResult struct {
	encoding string // "utf-8", "utf-16le", "utf-16be", "binary"
	bomSize  int    // BOM 的字节数(需要跳过)
}

// detectEncoding 通过文件头部字节检测编码.
// 返回编码类型和 BOM 大小(0 表示无 BOM).
func detectEncoding(header []byte) encodingResult {
	if len(header) == 0 {
		return encodingResult{encoding: "utf-8", bomSize: 0}
	}

	// UTF-8 BOM: EF BB BF
	if len(header) >= 3 && header[0] == 0xEF && header[1] == 0xBB && header[2] == 0xBF {
		return encodingResult{encoding: "utf-8", bomSize: 3}
	}

	// UTF-16 LE BOM: FF FE
	if len(header) >= 2 && header[0] == 0xFF && header[1] == 0xFE {
		return encodingResult{encoding: "utf-16le", bomSize: 2}
	}

	// UTF-16 BE BOM: FE FF
	if len(header) >= 2 && header[0] == 0xFE && header[1] == 0xFF {
		return encodingResult{encoding: "utf-16be", bomSize: 2}
	}

	// 二进制文件检测:检查是否包含 null 字节
	// 历史包袱(LEGACY): 用 null 字节检测二进制文件是 1980 年代的做法(file(1) 命令),
	// 对 UTF-16 文件会误判(ASCII 字符的高位字节为 0x00).
	// 但这里我们已经先检测了 UTF-16 BOM,所以 null 字节检测排在后面,
	// 能正确处理有 BOM 的 UTF-16 文件.无 BOM 的 UTF-16 仍会被误判为二进制.
	if bytes.ContainsRune(header, 0) {
		return encodingResult{encoding: "binary", bomSize: 0}
	}

	return encodingResult{encoding: "utf-8", bomSize: 0}
}

// ─────────────────────────────────────────────────────────────────────
// 辅助函数
// ─────────────────────────────────────────────────────────────────────

// formatFileSize 格式化文件大小为人类可读形式.
func formatFileSize(size int64) string {
	if size < 1024 {
		return fmt.Sprintf("%d bytes", size)
	}
	if size < 1024*1024 {
		return fmt.Sprintf("%.1f KB", float64(size)/1024)
	}
	if size < 1024*1024*1024 {
		return fmt.Sprintf("%.1f MB", float64(size)/(1024*1024))
	}
	return fmt.Sprintf("%.1f GB", float64(size)/(1024*1024*1024))
}

// computeContentHash 计算内容的 SHA-256 哈希(hex 编码).
func computeContentHash(data []byte) string {
	h := sha256.Sum256(data)
	return fmt.Sprintf("%x", h)
}

// ─────────────────────────────────────────────────────────────────────
// PDF pages 参数解析
// ─────────────────────────────────────────────────────────────────────

// pdfMaxPagesPerRead 每次请求最多读取的 PDF 页数
const pdfMaxPagesPerRead = 20

// pdfPageRange 是解析后的 PDF 页码范围.
type pdfPageRange struct {
	FirstPage int // 1-based
	LastPage  int // 1-based, -1 表示到末尾(open-ended)
}

// parsePDFPageRange 解析 PDF 页码范围字符串.
// 支持格式:
//   - "5"    → {FirstPage: 5, LastPage: 5}
//   - "1-10" → {FirstPage: 1, LastPage: 10}
//   - "3-"   → {FirstPage: 3, LastPage: -1}(open-ended)
//
// 返回 nil 表示格式无效.
func parsePDFPageRange(pages string) *pdfPageRange {
	pages = strings.TrimSpace(pages)
	if pages == "" {
		return nil
	}

	// Open-ended: "N-"
	if strings.HasSuffix(pages, "-") {
		var first int
		_, err := fmt.Sscanf(pages[:len(pages)-1], "%d", &first)
		if err != nil || first < 1 {
			return nil
		}
		return &pdfPageRange{FirstPage: first, LastPage: -1}
	}

	dashIdx := strings.Index(pages, "-")
	if dashIdx == -1 {
		// 单页: "5"
		var page int
		_, err := fmt.Sscanf(pages, "%d", &page)
		if err != nil || page < 1 {
			return nil
		}
		return &pdfPageRange{FirstPage: page, LastPage: page}
	}

	// 范围: "1-10"
	var first, last int
	_, err := fmt.Sscanf(pages[:dashIdx], "%d", &first)
	if err != nil || first < 1 {
		return nil
	}
	_, err = fmt.Sscanf(pages[dashIdx+1:], "%d", &last)
	if err != nil || last < 1 || last < first {
		return nil
	}
	return &pdfPageRange{FirstPage: first, LastPage: last}
}

// validatePagesParam 验证 pages 参数.
// 返回 nil 表示验证通过,否则返回错误 Result.
func validatePagesParam(pages string) *tools.Result {
	if pages == "" {
		return nil
	}

	parsed := parsePDFPageRange(pages)
	if parsed == nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error: invalid pages parameter: %q. Use formats like \"1-5\", \"3\", or \"10-20\". Pages are 1-indexed.", pages),
			IsError: true,
		}
	}

	// 检查范围大小
	if parsed.LastPage == -1 {
		// open-ended range 视为超出限制
		return &tools.Result{
			Output:  fmt.Sprintf("error: open-ended page range %q may exceed maximum of %d pages per request. Please specify an end page.", pages, pdfMaxPagesPerRead),
			IsError: true,
		}
	}

	rangeSize := parsed.LastPage - parsed.FirstPage + 1
	if rangeSize > pdfMaxPagesPerRead {
		return &tools.Result{
			Output:  fmt.Sprintf("error: page range %q covers %d pages, exceeds maximum of %d pages per request. Please use a smaller range.", pages, rangeSize, pdfMaxPagesPerRead),
			IsError: true,
		}
	}

	return nil
}

// ─────────────────────────────────────────────────────────────────────
// Execute -- 主入口
// ─────────────────────────────────────────────────────────────────────

// Execute 读取文件内容并返回结果.
// 根据文件类型自动分流到不同的处理路径:
//   - 图片 → base64 内联
//   - PDF → 文件元信息
//   - Jupyter Notebook → cell 格式化
//   - 文本 → cat -n 格式行号输出
//
// 精妙之处(CLEVER): 处理顺序很重要.
// 1. 参数验证(无 I/O)
// 2. 路径安全检查(无 I/O 或仅 Lstat)
// 3. 设备文件阻止(纯路径检查,无 I/O)
// 4. 打开文件 + Stat(第一次 I/O)
// 5. 文件类型分流(基于扩展名 + magic bytes)
//
// 这种"分层防御"确保危险路径在任何 I/O 前被拦截.
func (t *FileReadTool) Execute(ctx context.Context, input json.RawMessage, progress tools.ProgressFunc) (*tools.Result, error) {
	var params fileReadInput
	if err := json.Unmarshal(input, &params); err != nil {
		return nil, fmt.Errorf("fileread: invalid input: %w", err)
	}

	// ── 1. 参数验证(纯计算,无 I/O) ──

	cleanPath, err := validatePath(params.FilePath)
	if err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error: %s", err),
			IsError: true,
		}, nil
	}

	// 验证 pages 参数
	if params.Pages != "" {
		if errResult := validatePagesParam(params.Pages); errResult != nil {
			return errResult, nil
		}
		// pages 只适用于 PDF 文件
		if !isPDFFile(cleanPath) {
			return &tools.Result{
				Output:  "error: pages parameter is only applicable to PDF files",
				IsError: true,
			}, nil
		}
	}

	// 默认读取 2000 行
	limit := params.Limit
	if limit <= 0 {
		limit = 2000
	}

	offset := params.Offset
	if offset < 0 {
		offset = 0
	}

	// ── 2. 设备文件阻止(纯路径检查,无 I/O) ──

	if isBlockedDevicePath(cleanPath) {
		return &tools.Result{
			Output:  fmt.Sprintf("error: cannot read '%s': this device file would block or produce infinite output", params.FilePath),
			IsError: true,
		}, nil
	}

	// ── 3. 符号链接检测 ──
	// 精妙之处(CLEVER): 先 Lstat 检测符号链接,如果是符号链接则解引用到真实路径.
	// 这确保后续的 Open/Stat 操作在真实路径上执行.
	// 攻击者无法用 symlink -> /etc/shadow 这样的链接绕过路径检查.
	_, resolvedPath, err := checkSymlink(cleanPath)
	if err != nil {
		if os.IsNotExist(err) {
			return &tools.Result{
				Output:  fmt.Sprintf("error: file not found: %s", params.FilePath),
				IsError: true,
			}, nil
		}
		return &tools.Result{
			Output:  fmt.Sprintf("error: %v", err),
			IsError: true,
		}, nil
	}

	// ── 4. 打开文件 ──

	f, err := os.Open(resolvedPath)
	if err != nil {
		if os.IsNotExist(err) {
			return &tools.Result{
				Output:  fmt.Sprintf("error: file not found: %s", params.FilePath),
				IsError: true,
			}, nil
		}
		if os.IsPermission(err) {
			return &tools.Result{
				Output:  fmt.Sprintf("error: permission denied: %s", params.FilePath),
				IsError: true,
			}, nil
		}
		return &tools.Result{
			Output:  fmt.Sprintf("error: %v", err),
			IsError: true,
		}, nil
	}
	defer f.Close()

	// Stat 获取文件信息
	info, err := f.Stat()
	if err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error: %v", err),
			IsError: true,
		}, nil
	}

	// 字符设备/块设备检测
	// 精妙之处(CLEVER): 除了路径黑名单(blockedDevicePaths),还通过 Stat.Mode()
	// 检测设备文件类型.这能捕获路径黑名单遗漏的设备文件.
	mode := info.Mode()
	if mode&os.ModeDevice != 0 {
		return &tools.Result{
			Output:  fmt.Sprintf("error: cannot read device file: %s", params.FilePath),
			IsError: true,
		}, nil
	}

	// 目录检查
	if info.IsDir() {
		return &tools.Result{
			Output:  fmt.Sprintf("error: %s is a directory, not a file. Use Bash with ls to list directory contents.", params.FilePath),
			IsError: true,
		}, nil
	}

	fileSize := info.Size()

	// ── 5. 空文件处理 ──
	if fileSize == 0 {
		return &tools.Result{
			Output:  "File exists but is empty",
			IsError: false,
		}, nil
	}

	// ── 6. 文件类型分流 ──

	// 图片文件:base64 内联返回
	if isImageFile(resolvedPath) {
		return t.handleImageFile(f, resolvedPath, fileSize)
	}

	// SVG 文件:作为文本返回(不做 base64 编码)
	// 精妙之处(CLEVER): SVG 是 XML 文本格式,模型直接读取 SVG 源码
	// 比看 base64 渲染的图片更有用--可以理解和编辑 SVG 结构.
	// 所以 SVG 走文本路径而非图片路径.

	// PDF 文件
	if isPDFFile(resolvedPath) {
		return t.handlePDFFile(f, resolvedPath, fileSize, params.Pages)
	}

	// Jupyter Notebook
	if isNotebookFile(resolvedPath) {
		return t.handleNotebookFile(f, resolvedPath, fileSize)
	}

	// ── 7. 通用文本文件处理 ──

	return t.handleTextFile(ctx, f, resolvedPath, info, offset, limit)
}

// ─────────────────────────────────────────────────────────────────────
// 文本文件处理
// ─────────────────────────────────────────────────────────────────────

// handleTextFile 处理通用文本文件.
func (t *FileReadTool) handleTextFile(ctx context.Context, f *os.File, filePath string, info os.FileInfo, offset, limit int) (*tools.Result, error) {
	// 先读取文件头部检测编码
	header := make([]byte, 512)
	n, err := f.Read(header)
	if err != nil && err != io.EOF {
		return &tools.Result{
			Output:  fmt.Sprintf("error detecting file type: %v", err),
			IsError: true,
		}, nil
	}
	header = header[:n]

	enc := detectEncoding(header)

	// 二进制文件
	if enc.encoding == "binary" {
		return &tools.Result{
			Output:  fmt.Sprintf("Binary file (size: %s)", formatFileSize(info.Size())),
			IsError: false,
		}, nil
	}

	// UTF-16 文件目前不支持完整解码,返回提示信息
	// 历史包袱(LEGACY): Go 标准库没有内置 UTF-16 文本文件读取支持.
	// 可以用 golang.org/x/text/encoding/unicode 包,但我们不引入外部依赖.
	// UTF-16 文件在代码场景中极其罕见(几乎只有 Windows 生成的某些配置文件).
	if enc.encoding == "utf-16le" || enc.encoding == "utf-16be" {
		return &tools.Result{
			Output:  fmt.Sprintf("UTF-16 encoded file detected (size: %s). Please convert to UTF-8 first.", formatFileSize(info.Size())),
			IsError: false,
		}, nil
	}

	// 回到文件开头(跳过 BOM)
	seekPos := int64(enc.bomSize)
	if _, err := f.Seek(seekPos, io.SeekStart); err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error reading file: %v", err),
			IsError: true,
		}, nil
	}

	var builder strings.Builder
	var fullContent []byte // 用于缓存记录的完整内容
	collectFull := t.fileCache != nil && offset == 0

	lineNum := 0 // 当前行号(0-based 内部计数)
	linesRead := 0
	totalLines := 0

	if collectFull {
		// 批量读取优化: offset==0 + 有缓存时, 用 io.ReadAll 一次读完
		// 替代逐行 scanner -- 避免 per-line Scan()/Text()/append 开销.
		// 原方案(LEGACY): scanner.Scan() 逐行循环, 超过 limit 后仍 continue 收集 fullContent,
		// 大文件(>10万行)时 per-line 函数调用开销显著.
		data, err := io.ReadAll(f)
		if err != nil {
			return &tools.Result{
				Output:  fmt.Sprintf("error reading file: %v", err),
				IsError: true,
			}, nil
		}
		fullContent = data

		// 字节级行扫描: 提取前 limit 行输出, 同时计数总行数.
		lineStart := 0
		for i := range data {
			if data[i] == '\n' {
				lineNum++
				if lineNum <= limit {
					line := data[lineStart:i]
					// 兼容 \r\n 行尾(与 bufio.ScanLines 行为一致)
					if len(line) > 0 && line[len(line)-1] == '\r' {
						line = line[:len(line)-1]
					}
					fmt.Fprintf(&builder, "%6d\t%s\n", lineNum, string(line))
					linesRead++
				}
				lineStart = i + 1
			}
		}
		// 末行无 \n 的情况
		if lineStart < len(data) {
			lineNum++
			if lineNum <= limit {
				line := data[lineStart:]
				if len(line) > 0 && line[len(line)-1] == '\r' {
					line = line[:len(line)-1]
				}
				fmt.Fprintf(&builder, "%6d\t%s\n", lineNum, string(line))
				linesRead++
			}
		}
		totalLines = lineNum
	} else {
		// Scanner 路径: offset>0 或无缓存, 到 limit 即 break.
		scanner := bufio.NewScanner(f)
		scanner.Buffer(make([]byte, 0, 1024*1024), 1024*1024)

		for scanner.Scan() {
			select {
			case <-ctx.Done():
				return &tools.Result{
					Output:  "error: operation cancelled",
					IsError: true,
				}, nil
			default:
			}

			lineNum++
			totalLines = lineNum
			line := scanner.Text()

			// 跳过 offset 之前的行
			if lineNum-1 < offset {
				continue
			}

			if linesRead >= limit {
				break
			}

			// cat -n 格式:行号 + tab + 内容
			fmt.Fprintf(&builder, "%6d\t%s\n", lineNum, line)
			linesRead++
		}

		if err := scanner.Err(); err != nil {
			if err != io.EOF {
				return &tools.Result{
					Output:  fmt.Sprintf("error reading file: %v", err),
					IsError: true,
				}, nil
			}
		}
	}

	output := builder.String()
	if output == "" {
		if offset > 0 {
			return &tools.Result{
				Output:  fmt.Sprintf("(file has fewer than %d lines)", offset+1),
				IsError: false,
			}, nil
		}
		return &tools.Result{
			Output:  "File exists but is empty",
			IsError: false,
		}, nil
	}

	// 读取成功后记录到文件内容缓存
	if collectFull && len(fullContent) > 0 {
		t.fileCache.Record(filePath, fullContent)
	}

	// 记录到文件状态缓存
	if t.stateCache != nil {
		isPartial := offset > 0
		var contentForHash []byte
		if collectFull && len(fullContent) > 0 {
			contentForHash = fullContent
		} else {
			contentForHash = []byte(output)
		}
		t.stateCache.RecordState(filePath, FileStateCacheEntry{
			ContentHash:   computeContentHash(contentForHash),
			Size:          info.Size(),
			LineCount:     totalLines,
			ModTime:       info.ModTime(),
			IsPartialView: isPartial,
		})
	}

	return &tools.Result{
		Output:  output,
		IsError: false,
	}, nil
}

// ─────────────────────────────────────────────────────────────────────
// 图片文件处理
// ─────────────────────────────────────────────────────────────────────

// handleImageFile 处理图片文件,返回 base64 内联的图片内容.
//
// 精妙之处(CLEVER): 图片读取策略:一次读入内存,检测格式,获取尺寸.
// 如果太大(>10MB),不做内联,只返回元信息.
// 这避免了模型收到巨大的 base64 字符串导致上下文窗口溢出.
//
// 反向思考:为什么不总是缩放到固定大小?
// - 小图片(截图,图标)缩放会丢失关键细节
// - 大图片本身就说明不适合文本对话场景
// - Go 标准库的图片缩放质量有限(最近邻/双线性),不如早期方案用 sharp 库
// - 所以采用"小图直传,大图报信息"的策略
func (t *FileReadTool) handleImageFile(f *os.File, filePath string, fileSize int64) (*tools.Result, error) {
	// 大图片阈值:10MB
	const maxInlineSize int64 = 10 * 1024 * 1024

	fileName := filepath.Base(filePath)

	if fileSize > maxInlineSize {
		// 尝试读取图片头部获取尺寸信息
		dims := getImageDimensions(f)
		dimStr := ""
		if dims != nil {
			dimStr = fmt.Sprintf(", dimensions: %dx%d", dims.Width, dims.Height)
		}
		return &tools.Result{
			Output:  fmt.Sprintf("Image file: %s (size: %s%s). File is too large for inline display. Consider resizing or using a different tool.", fileName, formatFileSize(fileSize), dimStr),
			IsError: false,
		}, nil
	}

	// 安全(CLEVER): HEIC/TIFF 等格式转换需要将整个文件加载到内存再解码,
	// 100MB 硬上限防止恶意/损坏的超大文件触发 OOM.
	// maxInlineSize(10MB) 通常已在上方拦截,此处为纵深防御.
	const maxReadSize int64 = 100 * 1024 * 1024
	if fileSize > maxReadSize {
		return &tools.Result{
			Output:  fmt.Sprintf("Image file %s is too large to process (%s, limit 100MB).", fileName, formatFileSize(fileSize)),
			IsError: true,
		}, nil
	}

	// 读取整个文件
	if _, err := f.Seek(0, io.SeekStart); err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error reading image: %v", err),
			IsError: true,
		}, nil
	}

	data, err := io.ReadAll(f)
	if err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error reading image: %v", err),
			IsError: true,
		}, nil
	}

	// 检测实际图片格式(通过 magic bytes,不信任扩展名)
	mediaType := detectImageMediaType(data)

	// EXIF / 格式 方向校正.
	// 手机竖拍照片(JPEG/HEIC/WebP)常见 Orientation=6(顺时针90°),
	// 不校正时 vision API 看到侧倒的图片.
	// TIFF 不被 API 接受,需转换为 JPEG.
	//
	// 升华改进(ELEVATED): 早期实现 注释"需要外部库,暂不实现".
	// 我们覆盖了 4 种格式:
	//   JPEG  - 纯 Go 标准库(已有)
	//   TIFF  - 纯 Go 最小解码器(无压缩/LZW/PackBits)
	//   WebP  - RIFF 解析自写 + x/image/webp 像素解码
	//   HEIC  - CGO + libheif(系统库,自动处理 HEVC + orientation)
	// fail-open:任何校正失败都返回原始数据,不影响主流程.
	switch mediaType {
	case "image/jpeg":
		data = correctJPEGOrientation(data)
	case "image/webp":
		data, mediaType = correctWebPOrientation(data)
	case "image/tiff":
		data, mediaType = correctTIFFImage(data)
	case "image/heic":
		data, mediaType = correctHEICToJPEG(data)
	}

	// 获取图片尺寸(使用校正后的数据)
	dims := decodeImageDimensions(data)
	dimStr := ""
	if dims != nil {
		dimStr = fmt.Sprintf(", dimensions: %dx%d", dims.Width, dims.Height)
	}

	// Base64 编码
	b64 := base64.StdEncoding.EncodeToString(data)

	// 构建 Data 结构,让调用方可以获取结构化的图片数据
	imgResult := &ImageResult{
		MediaType: mediaType,
		Base64:    b64,
		Width:     0,
		Height:    0,
	}
	if dims != nil {
		imgResult.Width = dims.Width
		imgResult.Height = dims.Height
	}

	return &tools.Result{
		Output:  fmt.Sprintf("Image file: %s (size: %s, type: %s%s)\n[base64 image data: %d chars]", fileName, formatFileSize(fileSize), mediaType, dimStr, len(b64)),
		IsError: false,
		Data:    imgResult,
	}, nil
}

// ─────────────────────────────────────────────────────────────────────
// PDF 文件处理
// ─────────────────────────────────────────────────────────────────────

// handlePDFFile 处理 PDF 文件.
//
// 历史包袱(LEGACY): Go 标准库没有 PDF 解析能力.
// 原 TS 项目依赖 pdf.js 和 poppler-utils 做完整的 PDF 处理.
// Go 版本只能做基础处理:
// - 通过 magic bytes 确认是真正的 PDF
// - 估算页数(搜索 /Type /Page 标记)
// - 返回文件元信息,建议用 Bash 工具配合外部 PDF 工具
func (t *FileReadTool) handlePDFFile(f *os.File, filePath string, fileSize int64, pages string) (*tools.Result, error) {
	fileName := filepath.Base(filePath)

	// 读取前 1024 字节检测 PDF magic bytes
	header := make([]byte, 1024)
	n, err := f.Read(header)
	if err != nil && err != io.EOF {
		return &tools.Result{
			Output:  fmt.Sprintf("error reading PDF: %v", err),
			IsError: true,
		}, nil
	}
	header = header[:n]

	if !isPDFMagicBytes(header) {
		return &tools.Result{
			Output:  fmt.Sprintf("File has .pdf extension but does not appear to be a valid PDF: %s", fileName),
			IsError: true,
		}, nil
	}

	// 估算页数
	pageCount := estimatePDFPageCount(f, fileSize)

	// 构建输出信息
	var sb strings.Builder
	fmt.Fprintf(&sb, "PDF file: %s (size: %s", fileName, formatFileSize(fileSize))
	if pageCount > 0 {
		fmt.Fprintf(&sb, ", ~%d pages", pageCount)
	}
	sb.WriteString(")")

	if pages != "" {
		parsed := parsePDFPageRange(pages)
		if parsed != nil {
			fmt.Fprintf(&sb, "\nRequested pages: %s", pages)
			if pageCount > 0 && parsed.FirstPage > pageCount {
				fmt.Fprintf(&sb, "\nWarning: requested page %d exceeds estimated page count %d", parsed.FirstPage, pageCount)
			}
		}
	}

	sb.WriteString("\nUse Bash with pdftotext or similar tools to extract text content.")

	return &tools.Result{
		Output:  sb.String(),
		IsError: false,
	}, nil
}

// ─────────────────────────────────────────────────────────────────────
// Jupyter Notebook 处理
// ─────────────────────────────────────────────────────────────────────

// handleNotebookFile 处理 Jupyter Notebook 文件.
//
// 精妙之处(CLEVER): .ipynb 本质上是 JSON 文件,结构为:
// { "cells": [...], "metadata": {...}, "nbformat": N }
// 每个 cell 有 cell_type (code/markdown/raw), source (代码), outputs (执行结果).
// 我们按这个结构解析并格式化输出,保留执行计数 In[N]/Out[N].
func (t *FileReadTool) handleNotebookFile(f *os.File, filePath string, fileSize int64) (*tools.Result, error) {
	// 读取整个文件(Notebook 一般不会太大)
	const maxNotebookSize int64 = 50 * 1024 * 1024 // 50MB
	if fileSize > maxNotebookSize {
		return &tools.Result{
			Output: fmt.Sprintf("Notebook file is too large (%s). Use Bash with jq to read specific portions:\n"+
				"  cat %q | jq '.cells[:20]'           # First 20 cells\n"+
				"  cat %q | jq '.cells | length'        # Count total cells",
				formatFileSize(fileSize), filePath, filePath),
			IsError: true,
		}, nil
	}

	if _, err := f.Seek(0, io.SeekStart); err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error reading notebook: %v", err),
			IsError: true,
		}, nil
	}

	data, err := io.ReadAll(f)
	if err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error reading notebook: %v", err),
			IsError: true,
		}, nil
	}

	output, err := formatNotebook(data)
	if err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error parsing notebook: %v", err),
			IsError: true,
		}, nil
	}

	// 缓存 notebook 内容
	if t.fileCache != nil {
		t.fileCache.Record(filePath, data)
	}

	return &tools.Result{
		Output:  output,
		IsError: false,
	}, nil
}