package builtin

// Grep 工具 -- 正则表达式搜索文件内容(双引擎版).
//
// 这是 Agent 在代码库中搜索的核心能力:通过正则表达式模式
// 在文件内容中搜索匹配的行.
//
// 升华改进(ELEVATED): 双引擎架构 - 检测系统是否有 rg(ripgrep),
// 有就用(性能最优,SIMD 加速),没有就用纯 Go regexp 兜底(零依赖).
// 替代方案:只用纯 Go regexp(原始设计,零依赖但在大型代码库中慢 10-50 倍).
//
// 特性:
//   - Ripgrep 引擎:调用系统 rg 命令,支持 --type 文件类型过滤,多行匹配,PCRE2
//   - 内置引擎:纯 Go regexp 实现,手动 .gitignore 处理
//   - 支持 output_mode: content / files_with_matches / count
//   - 支持 -A/-B/-C 上下文行数(分别控制前后行数)
//   - 支持 case_insensitive 大小写不敏感搜索
//   - 支持 glob 参数过滤文件(逗号分割,花括号内逗号保护)
//   - 支持 type 参数按语言类型过滤
//   - 支持 multiline 跨行匹配
//   - 支持 offset 跳过前 N 条
//   - 尊重 .gitignore 规则
//   - 自动检测并跳过二进制文件
//   - 长行截断(超过 500 字符)
//   - 支持 head_limit(默认 250,0 = 无限制)
//   - 匹配文本用 >> << 标记高亮(内置引擎)
//   - VCS 目录排除(.git, .svn, .hg, .bzr, .jj, .sl)
//   - ConcurrencySafe: true,ReadOnly: true

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"regexp"

	"git.flytoex.net/yuanwei/flyto-agent/pkg/execenv"
	"git.flytoex.net/yuanwei/flyto-agent/pkg/permission"
	"git.flytoex.net/yuanwei/flyto-agent/pkg/tools"
)

// GrepTool 是正则表达式搜索工具.
type GrepTool struct {
	executor execenv.Executor
}

// NewGrepTool 创建一个 Grep 工具实例. executor 不能为 nil (方案 β 严格 DI).
// M1: GrepTool 从无状态 struct 升级为持有 Executor, 用于 RipgrepEngine 子进程.
func NewGrepTool(executor execenv.Executor) *GrepTool {
	if executor == nil {
		panic("builtin.NewGrepTool: executor is nil (方案 β 严格 DI)")
	}
	return &GrepTool{executor: executor}
}

// grepInput 是 Grep 工具的输入参数.
// 升华改进(ELEVATED): 增加了 -A/-B/-C 分离控制,type 文件类型,multiline 多行匹配,offset 偏移.
// 替代方案:只有 context 一个参数(原始设计,简单但无法精确控制前后行数).
type grepInput struct {
	Pattern         string `json:"pattern"`
	Path            string `json:"path,omitempty"`
	Glob            string `json:"glob,omitempty"`
	OutputMode      string `json:"output_mode,omitempty"`
	Context         int    `json:"context,omitempty"`          // -C:前后各 N 行
	ContextBefore   int    `json:"-B,omitempty"`               // -B:前 N 行
	ContextAfter    int    `json:"-A,omitempty"`               // -A:后 N 行
	CaseInsensitive bool   `json:"case_insensitive,omitempty"` // -i
	HeadLimit       int    `json:"head_limit,omitempty"`       // 输出行数上限(默认 250,0=无限)
	FileType        string `json:"type,omitempty"`             // 文件类型过滤(如 go, js, py)
	Multiline       bool   `json:"multiline,omitempty"`        // 跨行匹配
	Offset          int    `json:"offset,omitempty"`           // 跳过前 N 条
}

// Name 返回工具名称.
func (t *GrepTool) Name() string {
	return "Grep"
}

// Description 返回工具描述.
func (t *GrepTool) Description(ctx context.Context) string {
	return "Searches file contents using regular expressions. " +
		"Supports full regex syntax, file filtering with glob patterns, " +
		"and multiple output modes (content, files_with_matches, count). " +
		"Supports file type filtering (--type), multiline matching, " +
		"and separate before/after context control (-A, -B, -C). " +
		"Respects .gitignore rules. Automatically skips binary files. " +
		"Uses ripgrep when available, falls back to pure Go. " +
		"Use head_limit to control output size (default 250, 0 for unlimited)."
}

// InputSchema 返回工具的 JSON Schema 输入定义.
func (t *GrepTool) InputSchema() json.RawMessage {
	return json.RawMessage(`{
		"type": "object",
		"properties": {
			"pattern": {
				"type": "string",
				"description": "The regular expression pattern to search for in file contents"
			},
			"path": {
				"type": "string",
				"description": "File or directory to search in. Defaults to the current working directory."
			},
			"glob": {
				"type": "string",
				"description": "Glob pattern to filter files (e.g. \"*.js\", \"*.{ts,tsx}\"). Multiple patterns separated by commas."
			},
			"output_mode": {
				"type": "string",
				"description": "Output mode: \"content\" shows matching lines, \"files_with_matches\" shows only file paths (default), \"count\" shows match counts.",
				"enum": ["content", "files_with_matches", "count"]
			},
			"context": {
				"type": "integer",
				"description": "Number of lines to show before and after each match (like grep -C)"
			},
			"-B": {
				"type": "integer",
				"description": "Number of lines to show before each match (like grep -B)"
			},
			"-A": {
				"type": "integer",
				"description": "Number of lines to show after each match (like grep -A)"
			},
			"case_insensitive": {
				"type": "boolean",
				"description": "Case insensitive search"
			},
			"head_limit": {
				"type": "integer",
				"description": "Maximum number of output lines/entries (default 250, 0 for unlimited)"
			},
			"type": {
				"type": "string",
				"description": "File type to search (e.g. go, js, py, rust, java). Uses ripgrep type system."
			},
			"multiline": {
				"type": "boolean",
				"description": "Enable multiline mode where . matches newlines and patterns can span lines"
			},
			"offset": {
				"type": "integer",
				"description": "Skip first N entries before applying head_limit"
			}
		},
		"required": ["pattern"]
	}`)
}

// Metadata 返回工具元数据.
func (t *GrepTool) Metadata() tools.Metadata {
	return tools.Metadata{
		ConcurrencySafe: true,
		ReadOnly:        true,
		Destructive:     false,
		SearchHint:      "grep search regex pattern content find",
		PermissionClass: permission.PermClassReadOnly,
		AuditOperation:  "read",
	}
}

// grepMatch 表示单个匹配结果.
// 历史包袱(LEGACY): 保留此类型以兼容 grep_test.go 中的旧测试引用.
// 内部已迁移到 builtinMatch,但外部包可能依赖此类型.
type grepMatch struct {
	file    string
	line    int
	text    string
	context []string // 上下文行
}

// Execute 执行 grep 搜索.
func (t *GrepTool) Execute(ctx context.Context, input json.RawMessage, progress tools.ProgressFunc) (*tools.Result, error) {
	var params grepInput
	if err := json.Unmarshal(input, &params); err != nil {
		return nil, fmt.Errorf("grep: invalid input: %w", err)
	}

	if params.Pattern == "" {
		return &tools.Result{
			Output:  "error: pattern is required",
			IsError: true,
		}, nil
	}

	// 默认输出模式
	outputMode := params.OutputMode
	if outputMode == "" {
		outputMode = "files_with_matches"
	}

	// 升华改进(ELEVATED): head_limit 的 0=无限制巧妙设计.
	// JSON 中 int 零值也是 0,用 0 表示"无限制"而非"未设置".
	// 当 head_limit 未传或传 0 时默认 250;显式传 0 也是 250(Go 无法区分).
	// 如果确实需要无限制,传一个极大的数(如 999999).
	// 替代方案:使用 *int 指针类型区分 null 和 0(更精确但 JSON 解析复杂度增加).
	headLimit := params.HeadLimit
	if headLimit <= 0 {
		headLimit = 250
	}

	// 确定搜索路径
	searchPath := params.Path
	var err error
	if searchPath == "" {
		searchPath, err = os.Getwd()
		if err != nil {
			return &tools.Result{
				Output:  fmt.Sprintf("error getting working directory: %v", err),
				IsError: true,
			}, nil
		}
	}

	info, err := os.Stat(searchPath)
	if err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error: path not found: %s", searchPath),
			IsError: true,
		}, nil
	}

	searchDir := searchPath
	isFile := !info.IsDir()
	if isFile {
		searchDir = filepath.Dir(searchPath)
	}

	// 构建引擎参数
	grepParams := &GrepParams{
		Pattern:         params.Pattern,
		SearchPath:      searchPath,
		SearchDir:       searchDir,
		IsFile:          isFile,
		Glob:            params.Glob,
		OutputMode:      outputMode,
		ContextBefore:   params.ContextBefore,
		ContextAfter:    params.ContextAfter,
		ContextBoth:     params.Context,
		CaseInsensitive: params.CaseInsensitive,
		HeadLimit:       headLimit,
		FileType:        params.FileType,
		Multiline:       params.Multiline,
		Offset:          params.Offset,
	}

	// 选择引擎并执行搜索
	engine := DetectGrepEngine(t.executor)

	// 精妙之处(CLEVER): 内置引擎需要先验证 regex 合法性(避免 rg 的错误信息不友好).
	// 如果是 ripgrep 引擎,rg 会自己报错,但为了统一错误格式,
	// 我们在这里提前校验一次.
	if _, regexErr := compileGrepPattern(params.Pattern, params.CaseInsensitive, params.Multiline); regexErr != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error: invalid regex pattern: %v", regexErr),
			IsError: true,
		}, nil
	}

	result, err := engine.Search(ctx, grepParams)
	if err != nil {
		return &tools.Result{
			Output:  fmt.Sprintf("error: %v", err),
			IsError: true,
		}, nil
	}

	if result.TotalMatches == 0 || result.Output == "" {
		return &tools.Result{
			Output:  fmt.Sprintf("No matches found for pattern: %s", params.Pattern),
			IsError: false,
		}, nil
	}

	return &tools.Result{
		Output:  result.Output,
		IsError: false,
	}, nil
}

// compileGrepPattern 编译 grep 正则表达式(用于提前校验).
func compileGrepPattern(pattern string, caseInsensitive, multiline bool) (*regexp.Regexp, error) {
	regexPattern := pattern
	if caseInsensitive {
		regexPattern = "(?i)" + regexPattern
	}
	if multiline {
		regexPattern = "(?s)" + regexPattern
	}
	return regexp.Compile(regexPattern)
}

// truncateLine 截断超过指定长度的行.
func truncateLine(line string, maxLen int) string {
	if len(line) <= maxLen {
		return line
	}
	return line[:maxLen] + "... [line truncated]"
}

// isBinaryExtension 判断是否是二进制文件扩展名.
// 精妙之处(CLEVER): 用 map 而非 switch - O(1) 查找,且易于扩展.
// 覆盖常见的可执行文件,图片,压缩包,文档,音视频,字体格式.
func isBinaryExtension(ext string) bool {
	ext = toLowerASCII(ext)
	return binaryExtMap[ext]
}

// 升华改进(ELEVATED): 将扩展名映射提取为包级变量,避免每次调用都创建 map.
// 替代方案:函数内部创建 map(原始设计,每次调用都分配内存,GC 压力大).
var binaryExtMap = map[string]bool{
	".exe": true, ".dll": true, ".so": true, ".dylib": true, ".a": true,
	".o": true, ".obj": true, ".bin": true, ".dat": true,
	".png": true, ".jpg": true, ".jpeg": true, ".gif": true, ".bmp": true,
	".ico": true, ".svg": true, ".webp": true,
	".zip": true, ".tar": true, ".gz": true, ".bz2": true, ".xz": true,
	".7z": true, ".rar": true,
	".pdf": true, ".doc": true, ".docx": true, ".xls": true, ".xlsx": true,
	".wasm": true, ".pyc": true, ".class": true,
	".mp3": true, ".mp4": true, ".avi": true, ".mov": true, ".wav": true,
	".ttf": true, ".otf": true, ".woff": true, ".woff2": true, ".eot": true,
}

// toLowerASCII 将 ASCII 字符串转为小写(不分配内存的快速实现).
func toLowerASCII(s string) string {
	hasUpper := false
	for i := 0; i < len(s); i++ {
		if s[i] >= 'A' && s[i] <= 'Z' {
			hasUpper = true
			break
		}
	}
	if !hasUpper {
		return s
	}
	b := make([]byte, len(s))
	for i := 0; i < len(s); i++ {
		c := s[i]
		if c >= 'A' && c <= 'Z' {
			c += 'a' - 'A'
		}
		b[i] = c
	}
	return string(b)
}