package permission

// Bash 命令安全分析.
//
// 提供 Bash 命令的安全检查功能,包括:
//   - 复合命令分割(&& || | ;)
//   - 命令名提取(跳过 env 前缀等)
//   - 危险命令检测
//   - 危险文件路径检测

import (
	"fmt"
	"strings"

	"git.flytoex.net/yuanwei/flyto-agent/internal/syslib/bash"
)

// dangerousCommands 是已知的危险命令模式列表.
// 这些命令可能造成不可逆的破坏.
var dangerousCommands = map[string]bool{
	"rm":       true, // 文件删除
	"rmdir":    true, // 目录删除
	"mkfs":     true, // 格式化磁盘
	"dd":       true, // 磁盘直接写入
	"shutdown": true, // 关机
	"reboot":   true, // 重启
	"init":     true, // 系统初始化
	"kill":     true, // 杀进程
	"killall":  true, // 批量杀进程
	"pkill":    true, // 按名称杀进程
}

// dangerousPatterns 是危险命令模式列表(需要参数组合才危险).
var dangerousPatterns = []struct {
	Command string
	Args    []string // 任一参数匹配即视为危险
}{
	{"rm", []string{"-rf", "-fr", "--recursive", "--force"}},
	{"chmod", []string{"777", "000", "+s"}},
	{"chown", []string{"-R"}},
	{"git", []string{"push --force", "push -f", "reset --hard", "clean -f", "clean -fd", "clean -fdx"}},
	{"sudo", nil}, // sudo 本身就是危险的
	{"su", nil},   // su 本身就是危险的
	{"doas", nil}, // OpenBSD sudo 替代品,同等提权能力
	// 升华改进(ELEVATED): 早期设计缺少脚本语言的代码执行模式--
	// python -c "os.system('rm -rf /')" 可在字符串内嵌入任意命令绕过检测.
	// 仅在带 -c/-e 执行参数时标记危险,普通脚本文件执行(python foo.py)不触发.
	{"python", []string{"-c"}},
	{"python3", []string{"-c"}},
	{"perl", []string{"-e", "-E"}},
	{"ruby", []string{"-e"}},
	{"node", []string{"-e", "--eval"}},
	{"php", []string{"-r"}},
	// expect 可以自动化交互式命令,包括 su/sudo 密码输入
	{"expect", nil},
}

// dangerousSQLPatterns 是危险的 SQL 模式.
var dangerousSQLPatterns = []string{
	"drop table",
	"drop database",
	"truncate table",
	"delete from",
	"drop schema",
}

// dangerousEnvVarPrefixes 是已知的高风险环境变量前缀.
// 在 `FOO=bar cmd` 形式的命令里, Assignment.Name 命中此表即视为注入向量:
// 这些变量能在不改命令名或参数的情况下改变目标程序的行为 (加载恶意动
// 态库 / 劫持 PATH 查找 / 改变脚本解释器导入路径). 黑名单不完备, 但
// 覆盖了常见的 Linux/macOS 后利用命令行注入手法.
//
// dangerousEnvVarPrefixes is the known set of high-risk environment
// variable prefixes. When an `Assignment.Name` in a command's
// `FOO=bar cmd` prefix hits this table, it counts as an injection vector:
// the variable can alter target-program behavior without changing the
// command name or arguments (load a malicious shared library, hijack
// PATH lookup, redirect an interpreter's import path). The deny-list is
// not exhaustive but covers common post-exploitation tactics.
//
// Wire point for bash.Assignment.Name: parser records each `NAME=VALUE`
// prefix; IsDangerousCommand / AnalyzeDanger iterate c.Assignments and
// read assign.Name against this table, making Name the syntactic read
// site the dead-field scanner needs while also giving real security
// value.
var dangerousEnvVarPrefixes = map[string]bool{
	// Dynamic linker hijacking (glibc / musl).
	// 动态链接器劫持 (glibc / musl).
	"LD_PRELOAD":      true, // Load attacker shared object before libc.
	"LD_LIBRARY_PATH": true, // Redirect library lookup.
	"LD_AUDIT":        true, // Attach audit module (glibc ≥ 2.4).
	"DYLD_INSERT_LIBRARIES": true, // macOS equivalent of LD_PRELOAD.
	"DYLD_LIBRARY_PATH":     true, // macOS library path override.
	// Shell initialization / parsing.
	// Shell 初始化 / 解析.
	"IFS":            true, // Redefine word splitting → arg confusion.
	"BASH_ENV":       true, // Path sourced before every non-interactive bash.
	"ENV":            true, // Same, for sh/dash/ash.
	"PROMPT_COMMAND": true, // Bash executes this before every prompt.
	// PATH hijacking — any target binary can be shadowed.
	// PATH 劫持 -- 任何目标二进制都可被同名恶意文件替换.
	"PATH": true,
	// Language-runtime hijacking: attacker code loaded at interpreter start.
	// 语言运行时劫持: 攻击者代码在解释器启动时加载.
	"PYTHONPATH":    true,
	"PYTHONSTARTUP": true,
	"PYTHONHOME":    true,
	"NODE_PATH":     true,
	"NODE_OPTIONS":  true, // Can specify --require attack module.
	"PERL5LIB":      true,
	"PERL5OPT":      true,
	"RUBYLIB":       true,
	"RUBYOPT":       true,
	// Git remote command injection.
	// Git 远程命令注入.
	"GIT_SSH_COMMAND": true,
	"GIT_EXTERNAL_DIFF": true,
}

// dangerousFiles 是受保护的文件列表.
// 修改这些文件可能影响系统行为或安全.
var dangerousFiles = []string{
	// Shell 配置文件
	".bashrc",
	".bash_profile",
	".bash_login",
	".bash_logout",
	".zshrc",
	".zprofile",
	".zshenv",
	".profile",
	".login",
	".cshrc",

	// Git 配置
	".gitconfig",
	".gitignore",

	// SSH 配置
	".ssh/config",
	".ssh/authorized_keys",
	".ssh/known_hosts",
	".ssh/id_rsa",
	".ssh/id_ed25519",

	// 编辑器和工具配置
	"settings.json",
	".mcp.json",
	".env",
	".env.local",
	".env.production",
	".npmrc",
	".yarnrc",

	// Flyto 配置
	".flyto/settings.json",
	".flyto/settings.local.json",
}

// SplitCompoundCommand 将复合 Shell 命令分割为独立的子命令.
//
// 使用 AST 解析器正确处理复杂语法:heredoc,嵌套引号,命令替换等.
// 保持原有的函数签名,内部实现替换为 AST 解析.
//
// 支持的分隔符:
//   - &&  逻辑与
//   - ||  逻辑或
//   - |   管道
//   - ;   顺序执行
//
// 会正确处理引号内的分隔符(不分割).
//
// 示例:
//   - "npm install && npm test" → ["npm install", "npm test"]
//   - "echo 'a && b'" → ["echo 'a && b'"](引号内不分割)
//   - "cat file | grep pattern | wc -l" → ["cat file", "grep pattern", "wc -l"]
//
// 精妙之处(CLEVER): 用完整的 AST 解析器做命令分割,而非简单的字符串 split--
// 早期方案用正则和字符状态机分割,遇到 heredoc,嵌套引号,命令替换就出错.
// 例如 `echo "a && b" && rm -rf /` 中 echo 参数里的 && 不应被当作分隔符.
// 升级为 AST 解析后彻底解决了此类误判,安全检查不再有绕过风险.
func SplitCompoundCommand(cmd string) []string {
	cmd = strings.TrimSpace(cmd)
	if cmd == "" {
		return nil
	}

	root := bash.Parse(cmd)
	commands := bash.ExtractCommands(root)

	if len(commands) == 0 {
		return nil
	}

	// 从提取的命令中构建结果字符串
	result := make([]string, 0, len(commands))
	for _, c := range commands {
		raw := strings.TrimSpace(c.RawText)
		if raw != "" {
			result = append(result, raw)
		}
	}

	return result
}

// ExtractCommandName 从命令字符串中提取主命令名和子命令.
//
// 使用 AST 解析器正确提取命令名,跳过 env/sudo 等前缀.
// 保持原有的函数签名,内部实现替换为 AST 解析.
//
// 会跳过常见的前缀:
//   - env VAR=value ...
//   - sudo ...
//   - nohup ...
//   - time ...
//   - nice ...
//
// 示例:
//   - "npm install"        → ("npm", "install")
//   - "env FOO=bar npm i"  → ("npm", "i")
//   - "sudo rm -rf /"      → ("rm", "-rf")
//   - "git push --force"   → ("git", "push")
func ExtractCommandName(cmd string) (command string, subcommand string) {
	cmd = strings.TrimSpace(cmd)
	if cmd == "" {
		return "", ""
	}

	// 使用 AST 解析器解析命令
	root := bash.Parse(cmd)
	commands := bash.ExtractCommands(root)

	if len(commands) == 0 {
		return "", ""
	}

	// 使用 AST 的命令名提取(跳过 env/sudo 等前缀)
	return bash.ExtractCommandName(commands[0])
}

// tokenizeCommand 简单分词,尊重引号.
func tokenizeCommand(cmd string) []string {
	var tokens []string
	var current strings.Builder
	inSingleQuote := false
	inDoubleQuote := false

	for i := 0; i < len(cmd); i++ {
		ch := cmd[i]

		if ch == '\\' && i+1 < len(cmd) && !inSingleQuote {
			current.WriteByte(cmd[i+1])
			i++
			continue
		}

		if ch == '\'' && !inDoubleQuote {
			inSingleQuote = !inSingleQuote
			continue
		}
		if ch == '"' && !inSingleQuote {
			inDoubleQuote = !inDoubleQuote
			continue
		}

		if (ch == ' ' || ch == '\t') && !inSingleQuote && !inDoubleQuote {
			if current.Len() > 0 {
				tokens = append(tokens, current.String())
				current.Reset()
			}
			continue
		}

		current.WriteByte(ch)
	}

	if current.Len() > 0 {
		tokens = append(tokens, current.String())
	}

	return tokens
}

// IsDangerousCommand 检查命令是否为危险命令.
//
// 检查维度:
//  1. 命令名是否在危险命令列表中
//  2. 命令+参数组合是否匹配危险模式
//  3. 命令内容是否包含危险 SQL 语句
//  4. 命令是否操作危险文件
//  5. 命令替换 $(...) 或反引号内的嵌套命令(递归检查)
//
// 升华改进(ELEVATED): 使用 ExtractAllCommands 递归提取命令替换内的命令,
// 防止 `echo $(rm -rf /)` 这类嵌套危险命令绕过检测.
// 早期方案只检查顶层命令,命令替换内部命令不可见.
// 替代方案:只对字符串做全文匹配--否决:高误报率,且无法正确提取命令名/参数.
func IsDangerousCommand(cmd string) bool {
	cmd = strings.TrimSpace(cmd)
	if cmd == "" {
		return false
	}

	cmdLower := strings.ToLower(cmd)

	// 检查危险 SQL 模式(全文搜索,AST 提取前先快速排查)
	for _, pattern := range dangerousSQLPatterns {
		if strings.Contains(cmdLower, pattern) {
			return true
		}
	}

	// 精妙之处(CLEVER): 使用 ExtractAllCommands 递归提取所有命令(含命令替换内嵌套命令).
	// 对每个提取到的命令独立做危险检查,命令替换内的 rm -rf 不再能隐身.
	root := bash.Parse(cmd)
	allCmds := bash.ExtractAllCommands(root)

	for _, c := range allCmds {
		name, _ := bash.ExtractCommandName(c)
		nameLower := strings.ToLower(name)

		// 精妙之处(CLEVER): 双重检查 sudo/su--
		// bash.ExtractCommandName 会跳过 sudo 前缀(返回实际命令),
		// 所以必须同时检查 c.Name(原始命令名,含 sudo)和 name(剥前缀后的命令).
		// 例如 "sudo cat /etc/shadow" 中 c.Name="sudo",name="cat";
		// 只检查 name 会漏掉 sudo;只检查 c.Name 无法检测到 "su - root"(c.Name="su",name="").
		rawNameLower := strings.ToLower(c.Name)
		if rawNameLower == "sudo" || rawNameLower == "su" ||
			nameLower == "sudo" || nameLower == "su" {
			return true
		}

		// Dangerous env-var prefix check: `LD_PRELOAD=./evil.so cmd` etc.
		// ExtractCommandName strips these prefixes, so without this loop
		// the injection would be invisible to every other check.
		//
		// 危险 env-var 前缀检查: `LD_PRELOAD=./evil.so cmd` 等.
		// ExtractCommandName 会剥掉这些前缀, 没有此循环注入对其他检查
		// 全透明.
		for _, assign := range c.Assignments {
			if assign == nil {
				continue
			}
			if dangerousEnvVarPrefixes[strings.ToUpper(assign.Name)] {
				return true
			}
		}

		// 检查危险命令+参数组合
		// 构建该命令的完整字符串用于参数匹配
		cmdStr := strings.ToLower(c.RawText)
		if cmdStr == "" {
			// RawText 可能为空(命令替换内解析出的子命令),回退到 name+args
			parts := make([]string, 0, 1+len(c.Args))
			parts = append(parts, c.Name)
			parts = append(parts, c.Args...)
			cmdStr = strings.ToLower(strings.Join(parts, " "))
		}

		for _, pattern := range dangerousPatterns {
			if nameLower == strings.ToLower(pattern.Command) {
				if pattern.Args == nil {
					return true
				}
				for _, arg := range pattern.Args {
					if strings.Contains(cmdStr, strings.ToLower(arg)) {
						return true
					}
				}
			}
		}

		// 检查是否操作危险文件(对每条命令的原始文本和参数检查)
		checkStr := c.RawText
		if checkStr == "" {
			checkStr = strings.Join(append([]string{c.Name}, c.Args...), " ")
		}
		// Heredoc body often carries the real payload. Use
		// bash.ResolveHeredocBody so <<- bodies get leading-tab
		// stripped to match runtime content -- otherwise a body like
		// "\t.ssh/authorized_keys" would be analyzed as-is while bash
		// writes it without the tab.
		//
		// Heredoc body 往往才是真载荷. 用 bash.ResolveHeredocBody 让
		// <<- 的 body 剥掉行首 tab, 对齐运行时内容 -- 否则类似
		// "\t.ssh/authorized_keys" 的 body 会按原文分析, 而 bash
		// 实际写入时已去掉 tab.
		for _, redir := range c.Redirections {
			if body := bash.ResolveHeredocBody(redir); body != "" {
				checkStr += " " + body
			}
		}
		for _, dangerousFile := range dangerousFiles {
			if strings.Contains(checkStr, dangerousFile) {
				return true
			}
		}
	}

	// 若 AST 提取结果为空(解析失败或空命令),回退到全文字符串检查
	if len(allCmds) == 0 {
		command, _ := ExtractCommandName(cmd)
		commandLower := strings.ToLower(command)
		if commandLower == "sudo" || commandLower == "su" {
			return true
		}
		for _, dangerousFile := range dangerousFiles {
			if strings.Contains(cmd, dangerousFile) {
				return true
			}
		}
	}

	return false
}

// IsDangerousCommandName 检查命令名是否在已知危险命令列表中.
func IsDangerousCommandName(command string) bool {
	return dangerousCommands[strings.ToLower(command)]
}

// DangerInfo 结构化的危险分析结果.
// 供 checkpoint_suggested 事件携带,让消费层展示具体风险原因.
//
// HeredocBodyStart / HeredocBodyEnd 仅当危险字面量归因到某个
// heredoc body 时填非零值, 记录该 body 在**原文**中的字节半开区间
// [Start, End). 审计 / TUI / 日志可据此精确指向用户输入的触发段
// (例 "source bytes 42-67"), 而不只是 "heredoc 里有危险".
//
// HeredocBodyStart / HeredocBodyEnd are non-zero only when the
// dangerous literal is attributed to a heredoc body; they carry the
// body's byte half-open interval [Start, End) within the ORIGINAL
// source. Lets audit / TUI / logs point precisely at the user-input
// segment that triggered the warning (e.g. "source bytes 42-67"),
// rather than just "there is danger in a heredoc".
type DangerInfo struct {
	Reason           string // 人类可读的风险描述(英文,日志友好)
	Pattern          string // 匹配到的具体模式(如 "rm -rf" / "drop table" / ".ssh/authorized_keys")
	HeredocBodyStart int    // Heredoc body 在原文的起始字节 (仅 heredoc 归因时填). EN: origin-source start byte of offending heredoc body.
	HeredocBodyEnd   int    // Heredoc body 在原文的结束字节 (半开). EN: origin-source end byte (exclusive).
}

// amplifySyntaxContext annotates DangerInfo with the bash syntactic
// surround of the offending command, so operators / auditors / TUI see
// not just "rm -rf detected" but the full context ("rm -rf detected; in
// subshell; pipe stage 1; preceded by &&"). Builds on the alpha.8
// amplifyBackground (now subsumed) and extends it to the five previously
// dead CommandInfo context fields (Position / Operator / InSubshell /
// InPipeline / PipePosition), activating them as real security decision
// inputs rather than parser-decoration attributes.
//
// Reads six CommandInfo fields via SelectorExpr: Background / InSubshell /
// InPipeline / PipePosition / Operator / Position. Each field contributes
// a distinct Reason clause (joined by "; ") and, for structural markers,
// a Pattern prefix (stackable, outermost modifier first: "(subshell) " /
// "| " / "& "). Empty ctx or zero-valued fields are skipped, leaving
// DangerInfo untouched.
//
// amplifySyntaxContext 给 DangerInfo 附加命令的 bash 语法环境, 让运维 /
// 审计 / TUI 看到的不只是"rm -rf detected"而是完整上下文 ("rm -rf
// detected; in subshell; pipe stage 1; preceded by &&"). 承接 alpha.8
// 的 amplifyBackground (已合入) 并扩展到之前 5 个死字段 (Position /
// Operator / InSubshell / InPipeline / PipePosition), 把它们从 parser 装饰
// 属性激活为安全决策的真实输入.
//
// 经 SelectorExpr 读 CommandInfo 6 字段: Background / InSubshell /
// InPipeline / PipePosition / Operator / Position. 每字段贡献一条独立
// Reason clause (用 "; " 拼接); 结构性标记字段额外贡献 Pattern 前缀 (可叠加,
// 最外层修饰在最前: "(subshell) " / "| " / "& "). ctx 为 nil 或字段全零时
// 跳过, DangerInfo 不变.
func amplifySyntaxContext(info DangerInfo, c *bash.CommandInfo) DangerInfo {
	if c == nil {
		return info
	}
	var clauses []string
	var patternPrefix string
	if c.Background {
		clauses = append(clauses, "backgrounded -- escapes foreground control")
		patternPrefix = "& " + patternPrefix
	}
	if c.InSubshell {
		clauses = append(clauses, "in subshell")
		patternPrefix = "(subshell) " + patternPrefix
	}
	if c.InPipeline {
		// InPipeline is the reliable discriminator; PipePosition == 0 on
		// non-pipe commands collides with "first stage" by zero-value.
		//
		// InPipeline 是可靠区分器; 非管道命令 PipePosition == 0 零值会与
		// "第一阶段"值冲突.
		clauses = append(clauses, fmt.Sprintf("pipe stage %d", c.PipePosition))
		patternPrefix = "| " + patternPrefix
	}
	if c.Operator != "" {
		clauses = append(clauses, "preceded by "+c.Operator)
	}
	// Position > 0 guard: outermost single command has Position = 0 with no
	// meaningful "position" to surface; only emit for 2nd+ commands in a
	// list / pipeline where position disambiguates which one in the chain.
	//
	// Position > 0 守护: 最外层单命令 Position = 0, 没有可 surface 的"位置"
	// 信息; 仅在 list / pipeline 的第 2 条起 emit, 用于定位链中具体哪条.
	if c.Position > 0 {
		clauses = append(clauses, fmt.Sprintf("position %d", c.Position))
	}
	if len(clauses) == 0 {
		return info
	}
	info.Reason += " (" + strings.Join(clauses, "; ") + ")"
	if patternPrefix != "" {
		info.Pattern = patternPrefix + info.Pattern
	}
	return info
}

// AnalyzeDanger 对单条命令进行结构化危险分析,返回 (危险, DangerInfo).
//
// 与 IsDangerousCommand 相比,额外返回匹配的具体模式和可读原因,
// 用于 checkpoint_suggested 事件中向消费层展示上下文.
//
// 升华改进(ELEVATED): 早期设计只返回 bool,消费层无法告知用户"为什么这个命令危险".
// 结构化分析让 TUI 可以展示 "detected: rm -rf recursive delete" 而非仅"高风险".
// 替代方案:<返回 string 原因> - 否决:Pattern 和 Reason 职责不同,分开字段更灵活.
//
// 升华改进(ELEVATED): 使用 ExtractAllCommands 递归提取命令替换内的嵌套命令,
// 对每条提取到的命令独立做危险分析,防止 $(rm -rf /) 嵌套绕过.
// 替代方案:只分析顶层命令--否决:命令替换是最常见的安全绕过手段.
func AnalyzeDanger(cmd string) (bool, DangerInfo) {
	cmd = strings.TrimSpace(cmd)
	if cmd == "" {
		return false, DangerInfo{}
	}

	cmdLower := strings.ToLower(cmd)

	// 检查危险 SQL 模式(优先检查,SQL 注入影响范围最广,全文匹配即可)
	for _, pattern := range dangerousSQLPatterns {
		if strings.Contains(cmdLower, pattern) {
			return true, DangerInfo{
				Reason:  "destructive SQL statement: " + pattern,
				Pattern: pattern,
			}
		}
	}

	// 递归提取所有命令(含命令替换内嵌套命令)
	root := bash.Parse(cmd)
	allCmds := bash.ExtractAllCommands(root)

	for _, c := range allCmds {
		name, subcommand := bash.ExtractCommandName(c)
		nameLower := strings.ToLower(name)

		// sudo/su 检测:提权命令,命令名直接判断
		// 精妙之处(CLEVER): ExtractCommandName 会跳过 sudo 前缀--
		// 如果命令是 "sudo rm -rf /",name="rm",无法用 name 检测 sudo.
		// 因此同时检查 c.Name(未剥前缀的原始命令名)和 name(剥前缀后的真实命令).
		// 两者取并集,sudo 无论在何处都能被检测到.
		rawNameLower := strings.ToLower(c.Name)
		if rawNameLower == "sudo" || rawNameLower == "su" {
			return true, amplifySyntaxContext(DangerInfo{
				Reason:  "privilege escalation via " + rawNameLower,
				Pattern: rawNameLower,
			}, c)
		}

		// Dangerous env-var prefix analysis. Reports the first hit with
		// the exact variable name in the pattern so TUI can render
		// "injection vector: LD_PRELOAD=..." rather than a generic
		// "dangerous command".
		//
		// 危险 env-var 前缀分析. 报告首个命中, pattern 带精确变量名,
		// TUI 可渲染 "injection vector: LD_PRELOAD=..." 而非笼统的
		// "dangerous command".
		for _, assign := range c.Assignments {
			if assign == nil {
				continue
			}
			if dangerousEnvVarPrefixes[strings.ToUpper(assign.Name)] {
				return true, amplifySyntaxContext(DangerInfo{
					Reason:  "dangerous env-var prefix: " + assign.Name + " can alter target-program behavior",
					Pattern: assign.Name + "=" + assign.Value,
				}, c)
			}
		}

		if nameLower == "sudo" || nameLower == "su" {
			return true, amplifySyntaxContext(DangerInfo{
				Reason:  "privilege escalation via " + nameLower,
				Pattern: nameLower,
			}, c)
		}

		// 构建该命令的完整字符串用于参数匹配
		// 精妙之处(CLEVER): 命令替换内解析出的子命令 RawText 可能为空(无位置信息),
		// 此时回退到 name+args 重新拼接,保证参数检查不遗漏.
		cmdStr := strings.ToLower(c.RawText)
		if cmdStr == "" {
			parts := make([]string, 0, 1+len(c.Args))
			parts = append(parts, c.Name)
			parts = append(parts, c.Args...)
			cmdStr = strings.ToLower(strings.Join(parts, " "))
		}

		// 危险命令+参数组合
		for _, pattern := range dangerousPatterns {
			if nameLower == strings.ToLower(pattern.Command) {
				if pattern.Args == nil {
					return true, amplifySyntaxContext(DangerInfo{
						Reason:  "dangerous command: " + nameLower,
						Pattern: nameLower,
					}, c)
				}
				for _, arg := range pattern.Args {
					// 精妙之处(CLEVER): 使用 cmdStr + strings.ToLower(arg) 做大小写无关匹配--
					// dangerousPatterns 中的 "-R"(大写)应能匹配用户实际输入的 "-r"(小写).
					if strings.Contains(cmdStr, strings.ToLower(arg)) {
						pat := nameLower + " " + arg
						desc := pat
						if nameLower == "rm" {
							desc = "recursive force delete (rm -rf)"
						} else if nameLower == "git" && subcommand != "" {
							desc = "git " + subcommand + " (destructive)"
						}
						return true, amplifySyntaxContext(DangerInfo{
							Reason:  "dangerous pattern: " + desc,
							Pattern: pat,
						}, c)
					}
				}
			}
		}

		// 危险文件操作(对每条命令的原始文本检查)
		checkStr := c.RawText
		if checkStr == "" {
			checkStr = strings.Join(append([]string{c.Name}, c.Args...), " ")
		}
		// Track the heredoc that contributed body text -- when the
		// dangerous literal comes from a heredoc body, DangerInfo
		// carries the tag and expansion regime so TUI can point at
		// the exact heredoc and say whether variables expand.
		// Plain redirections (no heredoc) produce nil contribution.
		//
		// 追踪贡献 body 的 heredoc -- 当危险字面量来自某个 heredoc
		// body, DangerInfo 带上 tag 和展开状态, TUI 能精确指向该
		// heredoc 并告诉用户变量是否会展开. 非 heredoc 重定向贡献
		// 为 nil.
		var heredocContribs []*bash.RedirectionInfo
		for _, redir := range c.Redirections {
			body := bash.ResolveHeredocBody(redir)
			if body == "" {
				continue
			}
			checkStr += " " + body
			heredocContribs = append(heredocContribs, redir)
		}
		for _, dangerousFile := range dangerousFiles {
			if !strings.Contains(checkStr, dangerousFile) {
				continue
			}
			reason := "operation on protected file: " + dangerousFile
			pattern := dangerousFile
			// Attribute to a heredoc body if one contains the literal
			// (scan contribs; first hit wins -- same ordering as the
			// append above). Preserve "operation on protected file:"
			// prefix for backward-compatible parsing.
			//
			// 若危险字面量能在某个 heredoc body 里找到, 归因到它
			// (按追加顺序, 首命中优先). 保留 "operation on protected
			// file:" 前缀以兼容已有消费方解析.
			attributed := false
			var attributedBodyStart, attributedBodyEnd int
			for _, redir := range heredocContribs {
				body := bash.ResolveHeredocBody(redir)
				if !strings.Contains(body, dangerousFile) {
					continue
				}
				tag := redir.HeredocTag
				if tag == "" {
					tag = "?"
				}
				pattern = "<<" + tag + ": " + dangerousFile
				// posClause 把 body 在原文的字节半开区间 surface 到
				// Reason, 让审计 / TUI / 日志能精确指向用户输入的触发段.
				// posClause surfaces the body's byte half-open interval
				// in the ORIGINAL source into Reason, letting audit / TUI
				// / logs pinpoint the exact user-input segment.
				posClause := fmt.Sprintf(" at source bytes %d-%d", redir.HeredocBodyStart, redir.HeredocBodyEnd)
				if redir.HeredocQuoted {
					reason = "operation on protected file: " + dangerousFile +
						" (in heredoc <<" + tag + posClause + ", inert literal -- no runtime expansion)"
				} else {
					reason = "operation on protected file: " + dangerousFile +
						" (in heredoc <<" + tag + posClause + ", runtime expansion possible)"
				}
				attributedBodyStart = redir.HeredocBodyStart
				attributedBodyEnd = redir.HeredocBodyEnd
				attributed = true
				break
			}
			// If not attributable to a heredoc, try arg-level attribution:
			// find which arg index contains the dangerous literal, then
			// consult c.ArgQuoted[i] to report the quoting regime. A
			// single-quoted or ANSI-C-quoted arg (ArgQuoted[i]==true)
			// is an inert literal -- any `$VAR` inside won't expand so
			// the literal string IS the target. An unquoted or
			// double-quoted arg (false) may expand `$VAR` at runtime,
			// which matters when the target carries attacker-controlled
			// variables. Mirrors the heredoc branch's semantics so TUI
			// can render a consistent reason shape across body-level
			// and arg-level dangerous-file hits.
			//
			// 若无法归因到 heredoc, 走 arg 级归因: 找出哪个 arg 含
			// 危险字面量, 查 c.ArgQuoted[i] 报告引号 regime. 单引号
			// 或 ANSI-C 引号 (ArgQuoted[i]==true) 是 inert literal --
			// 内部 `$VAR` 不展开, 字面串就是目标. 未加引号或双引号
			// (false) 的 arg 可能在运行时展开 `$VAR`, 当目标携带
			// 攻击者可控变量时尤其重要. 与 heredoc 分支镜像, TUI
			// 对 body 级和 arg 级危险文件命中能渲染一致的 reason.
			if !attributed {
				for i, arg := range c.Args {
					if !strings.Contains(arg, dangerousFile) {
						continue
					}
					if i < len(c.ArgQuoted) && c.ArgQuoted[i] {
						reason = "operation on protected file: " + dangerousFile +
							" (literal arg -- no runtime expansion)"
					} else {
						reason = "operation on protected file: " + dangerousFile +
							" (arg may expand at runtime)"
					}
					break
				}
			}
			return true, amplifySyntaxContext(DangerInfo{
				Reason:           reason,
				Pattern:          pattern,
				HeredocBodyStart: attributedBodyStart,
				HeredocBodyEnd:   attributedBodyEnd,
			}, c)
		}
	}

	// 若 AST 提取结果为空(解析失败),回退到原始字符串检查危险文件
	if len(allCmds) == 0 {
		for _, dangerousFile := range dangerousFiles {
			if strings.Contains(cmd, dangerousFile) {
				return true, DangerInfo{
					Reason:  "operation on protected file: " + dangerousFile,
					Pattern: dangerousFile,
				}
			}
		}
	}

	return false, DangerInfo{}
}

// GetCommandPrefixes 从命令字符串中提取所有可能的前缀用于规则匹配.
//
// 对于 "npm install lodash",返回 ["npm", "npm install", "npm install lodash"]
func GetCommandPrefixes(cmd string) []string {
	parts := tokenizeCommand(strings.TrimSpace(cmd))
	if len(parts) == 0 {
		return nil
	}

	prefixes := make([]string, 0, len(parts))
	var builder strings.Builder
	for i, part := range parts {
		if i > 0 {
			builder.WriteByte(' ')
		}
		builder.WriteString(part)
		prefixes = append(prefixes, builder.String())
	}

	return prefixes
}

// 注意:tokenizeCommand 和 GetCommandPrefixes 保留原有的字符串分词实现,
// 因为它们在一些非安全关键路径中仍被使用.
// SplitCompoundCommand 和 ExtractCommandName 已经升级为使用 AST 解析器.