// 从 AST 提取安全相关信息.
//
// 本模块遍历 Bash AST,提取命令,参数,重定向等信息,
// 供权限系统和安全检查使用.
//
// 关键设计:
//   - 递归遍历所有节点,包括子 shell,管道,列表中的命令
//   - 跳过 env,sudo 等前缀,提取真正的命令名
//   - 正确处理环境变量赋值前缀
//   - 标记每个命令的上下文(是否在子 shell 中,管道位置等)
package bash

import (
	"strings"
	"unicode/utf8"
)

// CommandInfo 表示提取出的一个命令的完整信息.
//
// Background=true means this command was placed on the left of an `&`
// connector at some ancestor level -- bash runs it in the background
// and returns the prompt immediately, so the command escapes the
// foreground shell's lifecycle (cancellation, timeout, parent signal).
// Safety consumers treat this as a risk amplifier -- a backgrounded
// `rm -rf` cannot be cancelled by aborting the interactive session.
//
// Background=true 表示该命令处于某层 `&` 连接符的左侧 -- bash 把它
// 放入后台并立即返回提示符, 命令即脱离前台 shell 的生命周期 (取消,
// 超时, 父进程信号). 安全消费者把它视为风险放大因子 -- 后台的
// `rm -rf` 无法通过中止交互会话取消.
type CommandInfo struct {
	Name         string             // 命令名(已跳过 env/sudo 等前缀)
	Args         []string           // 参数列表
	Assignments  []*Assignment      // 环境变量前缀赋值
	Redirections []*RedirectionInfo // 重定向列表
	// Position is the sequential position within the current shell context.
	// Semantics verified against extract.go: NodeSubshell starts a fresh
	// context and resets Position to 0; NodeList / NodePipeline accumulate
	// outer ctx.position + inner index (so a command inside a nested list
	// sees outer parent position plus its own index). Position is NOT a
	// global absolute index across the whole command string — use it to
	// locate commands within a single shell level, not to uniquely
	// identify a command in nested structures.
	//
	// Position 是当前 shell context 内的顺序位置. 语义按 extract.go 核实:
	// NodeSubshell 开启新 context, Position 从 0 重算; NodeList /
	// NodePipeline 内累加外层 ctx.position + 内部索引 (嵌套 list 里的
	// 命令看到的是外层父位置 + 自己索引). Position 不是跨整条命令串的
	// 全局绝对索引 — 用于在同一 shell 层级内定位命令, 不能唯一标识嵌套
	// 结构中的某条命令.
	Position int

	// Operator is the bash connector preceding this command (&& / || / | / ;).
	// The first command in a list / pipeline (i == 0) inherits the parent
	// ctx.operator; an outermost command has "" (no preceding operator).
	// Consumers: "" means "starting position with no prior connector";
	// non-empty is one of &&, ||, |, ;.
	//
	// Operator 是命令前面的连接符 (&& / || / | / ;). list / pipeline 内第一个
	// 命令 (i == 0) 从父 ctx.operator 继承, 最外层命令为 "" (无前序操作符).
	// 消费者: "" 表示"起始位置, 无前序连接符"; 非空为 &&, ||, |, ; 之一.
	Operator string

	// InSubshell is true iff the command is enclosed in a NodeSubshell.
	// Remains true inside nested subshells.
	//
	// InSubshell 在命令被 NodeSubshell 包裹时为 true. 嵌套 subshell 内仍为 true.
	InSubshell bool

	// InPipeline is true iff the command is a stage of a NodePipeline.
	// **Always check this before reading PipePosition** — non-pipe commands
	// have PipePosition == 0 (zero-value default), which collides with the
	// "first pipeline stage" value. InPipeline is the only reliable
	// discriminator.
	//
	// InPipeline 在命令为 NodePipeline 阶段时为 true. **读 PipePosition 前
	// 必须先查 InPipeline** — 非管道命令 PipePosition == 0 (零值), 和"管道
	// 第一阶段"的值冲突; InPipeline 是唯一可靠的区分.
	InPipeline bool

	// PipePosition is the stage index within a pipeline (0 = first stage).
	// For non-pipe commands this field defaults to 0 which carries no
	// meaning — always gate reads behind InPipeline.
	//
	// PipePosition 是管道内阶段位置 (0 = 第一阶段). 非管道命令时字段默认 0,
	// 无实际含义 — 读取前先检查 InPipeline.
	PipePosition int
	Background   bool               // 位于 `&` 左侧, 后台执行. EN: left of an `&` connector, backgrounded.
	RawText      string             // 命令的原始文本

	// ArgQuoted is a parallel slice to Args (len(ArgQuoted) == len(Args))
	// recording per-arg quoting regime at parse time. true means the arg
	// was a fully-literal QuotedString (single quotes '...' or ANSI-C
	// $'...') whose body undergoes no parameter / command substitution.
	// false means the arg was either unquoted or double-quoted "...",
	// where $VAR / $(cmd) expand at runtime. Mirrors the heredoc Quoted
	// contract on RedirectionInfo so arg-level security analysis can
	// distinguish "inert literal" args from "possibly expanded" args --
	// a quoted `'$HOME/.ssh'` arg is *not* the same runtime target as
	// an unquoted `$HOME/.ssh`, and downstream consumers (permission
	// layer danger attribution) need the regime to render faithful
	// reasons without re-parsing the original source.
	//
	// ArgQuoted 是与 Args 平行的 slice (len(ArgQuoted) == len(Args)),
	// 记录每个 arg 在 parse 时的引号语义. true 表示该 arg 是完全字面
	// QuotedString (单引号 '...' 或 ANSI-C $'...'), body 不做参数 /
	// 命令替换. false 表示 arg 未加引号或在双引号 "..." 内, $VAR /
	// $(cmd) 会在运行时展开. 与 RedirectionInfo 上 heredoc 的 Quoted
	// 契约镜像, 让 arg 级安全分析区分 "inert literal" 与 "可能展开"
	// -- `'$HOME/.ssh'` (quoted) 与 `$HOME/.ssh` (unquoted) 运行时
	// 不是同一目标, 下游 (permission 层危险归因) 需要这个 regime 才
	// 能给出忠实原因, 不必回到源文本二次解析.
	ArgQuoted []bool
}

// RedirectionInfo 表示一个重定向操作.
//
// Heredoc fields (Tag/StripTabs/Quoted/Body) are populated only for
// NodeHeredoc redirections (Operator starting with "<<"). They surface
// the canonical heredoc semantics recorded by the parser so security
// consumers don't have to re-derive them from string shapes (e.g.
// consumers can read StripTabs directly rather than comparing
// Operator == "<<-"). Body is the raw heredoc content; Quoted=true
// means the delimiter was quoted (<<'EOF' / <<"EOF" / <<\EOF) so no
// parameter / command substitution expands inside Body -- a key gate
// for whether safety analysis needs to recursively parse Body.
//
// Heredoc 字段 (Tag/StripTabs/Quoted/Body) 仅对 NodeHeredoc 重定向
// (Operator 以 "<<" 开头) 填值, 把 parser 记录的 canonical heredoc
// 语义暴露给安全消费者, 避免它们再从字符串形状派生 (例如消费者直接
// 读 StripTabs 即可, 不必比较 Operator == "<<-"). Body 是 heredoc
// 原始内容; Quoted=true 表示分隔符被引号包裹 (<<'EOF' / <<"EOF" /
// <<\EOF), Body 内的 $VAR / $(cmd) 不会展开 -- 这是安全分析是否需
// 要递归解析 Body 的关键门控.
type RedirectionInfo struct {
	Operator string // 操作符:>, >>, <, <<, 2>&1 等
	Target   string // 目标文件/fd
	IsStatic bool   // 目标是否静态(无变量展开)

	HeredocTag       string // Heredoc 分隔符标签 (仅 Operator 以 "<<" 开头时填值). EN: heredoc delimiter tag, populated only for "<<*" operators.
	HeredocStripTabs bool   // <<- 变体, 去行首 tab. EN: <<- variant, strip leading tabs from body.
	HeredocQuoted    bool   // 分隔符被引号包裹, body 不展开变量. EN: delimiter was quoted; body does NOT expand variables / command substitutions.
	HeredocBody      string // Heredoc 原始 body. EN: raw heredoc body text.

	// HeredocBodyStart / HeredocBodyEnd: Heredoc body 在**原文**的字节
	// 半开区间, 仅 Operator 以 "<<" 开头且成功匹配到 body 时非零. 让
	// AnalyzeDanger 等安全消费者把危险 heredoc 定位回用户源码位置,
	// DangerInfo 可 surface "source bytes N-M" 给运维 / TUI / 审计.
	//
	// HeredocBodyStart / HeredocBodyEnd: byte offsets of the heredoc
	// body within the ORIGINAL source (half-open interval). Populated
	// only for "<<*" operators with a matched body. Lets security
	// consumers (AnalyzeDanger) point DangerInfo back to concrete
	// "source bytes N-M" for operators / TUI / audit.
	HeredocBodyStart int
	HeredocBodyEnd   int
}

// extractContext 在遍历 AST 时传递的上下文信息.
//
// background is set when an ancestor NodeList has Background=true and
// we're traversing its left subtree. It propagates down so every
// SimpleCommand in the backgrounded subtree inherits the flag.
//
// background 在祖先 NodeList.Background=true 且遍历左子树时置位, 向下
// 传播, 让后台子树内所有 SimpleCommand 继承该 flag.
type extractContext struct {
	inSubshell   bool
	inPipeline   bool
	pipePosition int
	operator     string
	position     int
	depth        int // 递归深度
	background   bool
}

// MaxRecursionDepth 是 AST 遍历的最大递归深度.
//
// 精妙之处(CLEVER): 防止 $($($(nested))) 这种深度嵌套导致栈溢出.
// 超过深度限制的命令直接跳过提取,交给 AI 分类器或用户确认.
// 20 层足以覆盖任何正常脚本--实测 Linux kernel 的 configure 脚本最深也就 8 层.
const MaxRecursionDepth = 20

// ExtractCommands 从 AST 中提取所有简单命令.
//
// 递归遍历 AST 树,找到所有 SimpleCommand 节点,提取命令信息.
// 嵌套在子 shell,管道,列表中的命令都会被提取.
// 受 MaxRecursionDepth 限制,超深度的子树直接跳过.
//
// 注意:此函数不进入 NodeCommandSubstitution 节点内部.
// 若需要递归提取命令替换($(...))内的命令,使用 ExtractAllCommands.
func ExtractCommands(root *Node) []*CommandInfo {
	if root == nil {
		return nil
	}

	ctx := extractContext{depth: 0}
	var result []*CommandInfo
	extractFromNode(root, ctx, &result)
	return result
}

// ExtractAllCommands 从 AST 中递归提取所有命令,包括命令替换 $(...) 和反引号内的命令.
//
// 与 ExtractCommands 的区别:
//   - ExtractCommands 只提取顶层 AST 的命令,NodeCommandSubstitution 内部不递归
//   - ExtractAllCommands 对每个 NodeCommandSubstitution 节点,剥去 $(...) 外壳后
//     重新 Parse 内部文本,再递归提取其中的命令
//
// 精妙之处(CLEVER): 命令替换是安全检查最容易被绕过的地方--
// `echo $(rm -rf /)` 中 rm -rf / 嵌套在命令替换里,ExtractCommands 只看到 echo,
// 安全检查会认为这条命令无害.ExtractAllCommands 递归展开所有嵌套层,
// 让危险命令无处遁形.
// 升华改进(ELEVATED): 反引号形式 `cmd` 和 $(...) 形式统一处理,覆盖全部命令替换语法.
// 替代方案:用正则匹配 $(...) 内容--否决:正则无法处理嵌套括号,
// 例如 $(echo $(rm -rf /)) 会导致括号匹配错误.
func ExtractAllCommands(root *Node) []*CommandInfo {
	if root == nil {
		return nil
	}

	ctx := extractContext{depth: 0}
	var result []*CommandInfo
	extractAllFromNode(root, ctx, &result)
	return result
}

// extractAllFromNode 从指定节点开始递归提取命令,并进入 NodeCommandSubstitution 内部.
func extractAllFromNode(node *Node, ctx extractContext, result *[]*CommandInfo) {
	if node == nil {
		return
	}
	if ctx.depth > MaxRecursionDepth {
		return
	}

	childDepth := ctx.depth + 1

	switch node.Type {
	case NodeCommandSubstitution:
		// 剥去外层 $(...) 或反引号,重新解析内部内容
		// 精妙之处(CLEVER): NodeCommandSubstitution 的 Value 是原始文本(含外壳),
		// 例如 "$(rm -rf /)" 或 "`rm -rf /`".
		// 必须先剥壳才能 Parse,否则解析器会把整段当作命令替换再次打包,
		// 导致无限循环或解析错误.
		inner := stripCommandSubstitution(node.Value)
		if inner == "" {
			return
		}
		// 递归深度传递:同一层节点调用,depth 不加 1
		// 但内部解析出新 AST,使用 childDepth 防止无限递归
		innerCtx := ctx
		innerCtx.depth = childDepth
		innerRoot := Parse(inner)
		if innerRoot != nil {
			extractAllFromNode(innerRoot, innerCtx, result)
		}

	case NodeProgram:
		for i, child := range node.Children {
			childCtx := ctx
			childCtx.position = i
			childCtx.depth = childDepth
			extractAllFromNode(child, childCtx, result)
		}

	case NodeList:
		for i, child := range node.Children {
			childCtx := ctx
			if i > 0 {
				childCtx.operator = node.Operator
			}
			childCtx.position = ctx.position + i
			childCtx.depth = childDepth
			// `&` semantics: only the left side goes background.
			// If this list terminates with `&` (Background=true),
			// left child inherits the flag; right child is the
			// foreground continuation and explicitly clears any
			// inherited background from ancestors.
			// For non-`&` lists (`&&` / `||` / `;`), ctx.background
			// propagates through unchanged -- a parent `&` that
			// backgrounded the whole group still reaches all
			// commands.
			//
			// `&` 语义: 只左侧入后台. 当前 list 以 `&` 收尾
			// (Background=true) 时, 左 child 继承 flag, 右 child
			// 是 `&` 之后的前台延续, 显式清掉祖先继承的 background.
			// 非 `&` 连接符 (`&&` / `||` / `;`) 时 ctx.background
			// 原样传递 -- 把整组置后台的祖先 `&` 仍能覆盖到里面
			// 的所有命令.
			if node.Background {
				childCtx.background = (i == 0)
			}
			extractAllFromNode(child, childCtx, result)
		}

	case NodePipeline:
		for i, child := range node.Children {
			childCtx := ctx
			childCtx.inPipeline = true
			childCtx.pipePosition = i
			childCtx.position = ctx.position + i
			childCtx.depth = childDepth
			if i > 0 {
				childCtx.operator = "|"
			}
			extractAllFromNode(child, childCtx, result)
		}

	case NodeSubshell:
		childCtx := ctx
		childCtx.inSubshell = true
		childCtx.depth = childDepth
		for i, child := range node.Children {
			childCtx.position = i
			extractAllFromNode(child, childCtx, result)
		}

	case NodeCompoundCommand:
		for i, child := range node.Children {
			childCtx := ctx
			childCtx.position = i
			childCtx.depth = childDepth
			extractAllFromNode(child, childCtx, result)
		}

	case NodeIf, NodeFor, NodeWhile, NodeCase:
		childCtx := ctx
		childCtx.depth = childDepth
		for _, child := range node.Children {
			extractAllFromNode(child, childCtx, result)
		}

	case NodeFunction:
		childCtx := ctx
		childCtx.depth = childDepth
		for _, child := range node.Children {
			extractAllFromNode(child, childCtx, result)
		}

	case NodeSimpleCommand:
		// 提取命令本身
		cmd := extractSimpleCommand(node, ctx)
		if cmd != nil {
			*result = append(*result, cmd)
		}
		// 同时深入命令的参数节点,查找嵌套的命令替换
		childCtx := ctx
		childCtx.depth = childDepth
		for _, child := range node.Children {
			// 无论子节点类型,都递归检查(NodeCommandSubstitution 会在递归时被剥壳处理)
			extractAllFromNode(child, childCtx, result)
		}
		// 精妙之处(CLEVER): 赋值语句 VAR=`cmd` 的命令替换藏在 Assignments 字段的 Value 里,
		// 不是 Children 节点,必须单独处理.
		// 例如 result=`sudo id` 解析后 node.Assignments[0].Value = "`sudo id`",
		// Children 里看不到命令替换节点.
		// 升华改进(ELEVATED): 支持赋值 RHS 里的任意命令替换,覆盖 VAR=$(cmd) 和 VAR=`cmd` 两种语法.
		for _, assign := range node.Assignments {
			if assign == nil {
				continue
			}
			v := assign.Value
			// 检测赋值值是否包含命令替换($(...) 或反引号)
			if strings.Contains(v, "$(") || strings.Contains(v, "`") {
				inner := stripCommandSubstitution(v)
				if inner != "" && inner != v {
					innerRoot := Parse(inner)
					if innerRoot != nil {
						innerCtx := ctx
						innerCtx.depth = childDepth
						extractAllFromNode(innerRoot, innerCtx, result)
					}
				}
			}
		}
		// Heredoc body can carry nested command substitutions that
		// bypass top-level scans just like Assignments.Value does:
		//   cat <<EOF
		//   $(rm -rf /)
		//   EOF
		// When the delimiter is quoted (<<'EOF' / <<"EOF" / <<\EOF)
		// no expansion happens at runtime, so the body is inert and
		// recursion would produce false positives. Gate on
		// HeredocQuoted=false. Mirrors Assignments.Value precedent
		// immediately above -- same "hidden payload" problem shape.
		//
		// Heredoc body 和 Assignments.Value 同样可能藏命令替换, 绕过
		// 顶层扫描:
		//   cat <<EOF
		//   $(rm -rf /)
		//   EOF
		// 分隔符加引号时 (<<'EOF' / <<"EOF" / <<\EOF) 运行时不展开,
		// body 是惰性字面量, 递归反而造成误报. 因此以 HeredocQuoted=
		// false 做门控. 与上方 Assignments.Value 同形, 都属于"隐藏
		// payload"类.
		for _, child := range node.Children {
			if child == nil || child.Type != NodeHeredoc {
				continue
			}
			if child.HeredocQuoted {
				continue
			}
			if child.HeredocBody == "" {
				continue
			}
			body := child.HeredocBody
			if !strings.Contains(body, "$(") && !strings.Contains(body, "`") && !strings.Contains(body, "$") {
				continue
			}
			innerRoot := Parse(body)
			if innerRoot != nil {
				innerCtx := ctx
				innerCtx.depth = childDepth
				extractAllFromNode(innerRoot, innerCtx, result)
			}
		}

	default:
		childCtx := ctx
		childCtx.depth = childDepth
		for _, child := range node.Children {
			extractAllFromNode(child, childCtx, result)
		}
	}
}

// stripCommandSubstitution 剥去命令替换的外层包装,返回内部命令文本.
//
// 支持的格式:
//   - $(cmd)  → "cmd"
//   - `cmd`   → "cmd"
//
// 精妙之处(CLEVER): 不用正则,用字节直接操作--
// 命令替换的外壳格式固定,字节操作比正则编译快 10 倍,且无依赖.
func stripCommandSubstitution(s string) string {
	s = strings.TrimSpace(s)
	if len(s) == 0 {
		return ""
	}
	// $(...) 格式
	if len(s) >= 3 && s[0] == '$' && s[1] == '(' && s[len(s)-1] == ')' {
		return strings.TrimSpace(s[2 : len(s)-1])
	}
	// 反引号格式 `...`
	if len(s) >= 2 && s[0] == '`' && s[len(s)-1] == '`' {
		return strings.TrimSpace(s[1 : len(s)-1])
	}
	// 已经是内部文本(无外壳),直接返回
	return s
}

// extractFromNode 从指定节点开始递归提取命令.
//
// 精妙之处(CLEVER): depth 参数追踪递归深度,超过 MaxRecursionDepth 时停止递归.
// 这不是性能优化--正常脚本不会到 20 层.这是对抗恶意构造的深层嵌套,
// 例如 $($($(... 20层 ...))),防止栈溢出导致进程崩溃.
func extractFromNode(node *Node, ctx extractContext, result *[]*CommandInfo) {
	if node == nil {
		return
	}
	if ctx.depth > MaxRecursionDepth {
		return // 超深度,停止递归
	}

	// 每次递归子节点时 depth+1
	childDepth := ctx.depth + 1

	switch node.Type {
	case NodeProgram:
		// 遍历所有子语句
		for i, child := range node.Children {
			childCtx := ctx
			childCtx.position = i
			childCtx.depth = childDepth
			extractFromNode(child, childCtx, result)
		}

	case NodeList:
		// 列表节点:左右两个子命令,通过操作符连接
		for i, child := range node.Children {
			childCtx := ctx
			if i > 0 {
				childCtx.operator = node.Operator
			}
			childCtx.position = ctx.position + i
			childCtx.depth = childDepth
			// Same `&` background propagation as extractAllFromNode
			// (left inherits, right clears). Kept duplicated for now
			// because the two traversals diverge on CommandSubstitution
			// handling -- unifying would cost clarity.
			//
			// 与 extractAllFromNode 同形的 `&` 后台传播 (左继承, 右
			// 清掉). 暂保留双份 -- 两条遍历在命令替换处理上有分歧,
			// 合并会牺牲清晰度.
			if node.Background {
				childCtx.background = (i == 0)
			}
			extractFromNode(child, childCtx, result)
		}

	case NodePipeline:
		// 管道节点:多个命令通过 | 连接
		for i, child := range node.Children {
			childCtx := ctx
			childCtx.inPipeline = true
			childCtx.pipePosition = i
			childCtx.position = ctx.position + i
			childCtx.depth = childDepth
			if i > 0 {
				childCtx.operator = "|"
			}
			extractFromNode(child, childCtx, result)
		}

	case NodeSubshell:
		// 子 shell:标记上下文
		childCtx := ctx
		childCtx.inSubshell = true
		childCtx.depth = childDepth
		for i, child := range node.Children {
			childCtx.position = i
			extractFromNode(child, childCtx, result)
		}

	case NodeCompoundCommand:
		// 花括号复合命令:遍历子命令
		for i, child := range node.Children {
			childCtx := ctx
			childCtx.position = i
			childCtx.depth = childDepth
			extractFromNode(child, childCtx, result)
		}

	case NodeIf, NodeFor, NodeWhile, NodeCase:
		// 控制结构:遍历所有子节点
		childCtx := ctx
		childCtx.depth = childDepth
		for _, child := range node.Children {
			extractFromNode(child, childCtx, result)
		}

	case NodeFunction:
		// 函数定义:遍历函数体
		childCtx := ctx
		childCtx.depth = childDepth
		for _, child := range node.Children {
			extractFromNode(child, childCtx, result)
		}

	case NodeSimpleCommand:
		// 提取命令信息
		cmd := extractSimpleCommand(node, ctx)
		if cmd != nil {
			*result = append(*result, cmd)
		}

	default:
		// 其他节点类型:递归遍历子节点
		childCtx := ctx
		childCtx.depth = childDepth
		for _, child := range node.Children {
			extractFromNode(child, childCtx, result)
		}
	}
}

// extractSimpleCommand 从 SimpleCommand 节点提取命令信息.
func extractSimpleCommand(node *Node, ctx extractContext) *CommandInfo {
	if node == nil || node.Type != NodeSimpleCommand {
		return nil
	}

	cmd := &CommandInfo{
		Operator:     ctx.operator,
		InSubshell:   ctx.inSubshell,
		InPipeline:   ctx.inPipeline,
		PipePosition: ctx.pipePosition,
		Position:     ctx.position,
		Background:   ctx.background,
		RawText:      node.Value,
	}

	// 收集赋值,命令名,参数,重定向.
	// wordQuoted 与 words 同长度平行, 记录每个 word 的 parser 端
	// Quoted 语义 -- 仅 NodeQuotedString 的子节点 (child.Quoted) 才
	// 视作 literal quoted, 其他 (NodeWord / NodeVariableExpansion /
	// NodeCommandSubstitution) 视为未加引号 (false). 这一步是让
	// parser 写入的 Node.Quoted 流到 CommandInfo.ArgQuoted 的唯一
	// 产线读点.
	//
	// wordQuoted runs parallel to words, recording each word's
	// parser-side Quoted regime -- only a NodeQuotedString child's
	// child.Quoted is honored as literal-quoted; other node types
	// (NodeWord / NodeVariableExpansion / NodeCommandSubstitution)
	// are treated as unquoted (false). This is the sole production
	// read-site that flows the parser-written Node.Quoted through
	// to CommandInfo.ArgQuoted.
	var words []string
	var wordQuoted []bool

	for _, child := range node.Children {
		switch child.Type {
		case NodeAssignment:
			// 已经在 node.Assignments 中
		case NodeRedirection, NodeHeredoc:
			redir := &RedirectionInfo{
				Operator: child.RedirectOp,
				Target:   child.RedirectTarget,
				IsStatic: IsStaticRedirectTarget(child.RedirectTarget),
			}
			if child.Type == NodeHeredoc {
				// Forward canonical heredoc fields so security consumers
				// (bash_security.go IsDangerousCommand / AnalyzeDanger)
				// can analyze Body and gate on Quoted.
				//
				// 把 canonical heredoc 字段透传给安全消费者
				// (bash_security.go IsDangerousCommand / AnalyzeDanger),
				// 让它们能分析 Body 并用 Quoted 门控展开.
				redir.HeredocTag = child.HeredocTag
				redir.HeredocStripTabs = child.HeredocStripTabs
				redir.HeredocQuoted = child.HeredocQuoted
				redir.HeredocBody = child.HeredocBody
				redir.HeredocBodyStart = child.HeredocBodyStart
				redir.HeredocBodyEnd = child.HeredocBodyEnd
				// Heredoc target semantically IS the tag. Parser leaves
				// RedirectTarget empty on NodeHeredoc; fill it here so
				// Target remains a single honest surface for all redirs.
				//
				// Heredoc 的 target 语义上就是 tag. parser 在 NodeHeredoc
				// 上没设 RedirectTarget, 这里补上, 让 Target 字段对所有
				// 重定向保持统一语义.
				if redir.Target == "" {
					redir.Target = child.HeredocTag
					redir.IsStatic = IsStaticRedirectTarget(child.HeredocTag)
				}
			}
			cmd.Redirections = append(cmd.Redirections, redir)
		default:
			// Word / QuotedString / VariableExpansion / CommandSubstitution
			words = append(words, resolveWordValue(child))
			wordQuoted = append(wordQuoted, child.Type == NodeQuotedString && child.Quoted)
		}
	}

	cmd.Assignments = node.Assignments

	if len(words) > 0 {
		cmd.Name = words[0]
		cmd.Args = words[1:]
		// Parallel to Args (skip [0] name). Nil when no args so
		// consumers can treat nil and empty identically.
		//
		// 与 Args 平行 (跳过 [0] 命令名). 无参数时保持 nil, 消费方
		// 可统一视作空切片.
		if len(wordQuoted) > 1 {
			cmd.ArgQuoted = wordQuoted[1:]
		}
	}

	return cmd
}

// resolveWordValue 解析 word 节点的值(去除引号等).
//
// 对于不同类型的节点:
//   - NodeWord: 返回原始文本
//   - NodeQuotedString: 去除外层引号
//   - NodeVariableExpansion: 返回原始文本(运行时才知道值)
//   - NodeCommandSubstitution: 返回原始文本
func resolveWordValue(node *Node) string {
	if node == nil {
		return ""
	}

	switch node.Type {
	case NodeQuotedString:
		v := node.Value
		// 去除外层引号
		if len(v) >= 2 {
			if v[0] == '\'' && v[len(v)-1] == '\'' {
				return v[1 : len(v)-1]
			}
			if v[0] == '"' && v[len(v)-1] == '"' {
				return v[1 : len(v)-1]
			}
			// ANSI-C: $'...'
			if len(v) >= 3 && v[0] == '$' && v[1] == '\'' && v[len(v)-1] == '\'' {
				return decodeANSIC(v[2 : len(v)-1])
			}
		}
		return v

	case NodeWord:
		// 如果有子节点(复合 word),递归解析
		if len(node.Children) > 0 {
			var parts []string
			for _, child := range node.Children {
				parts = append(parts, resolveWordValue(child))
			}
			return strings.Join(parts, "")
		}
		return node.Value

	default:
		return node.Value
	}
}

// decodeANSIC 解码 ANSI-C 转义序列(bash $'...' 引号内的内容).
//
// 支持的转义序列:
//
//	\\, \', \", \n, \t, \r, \a, \b, \f, \v, \0
//	\xNN (十六进制,1-2位)
//	\NNN (八进制,1-3位)
//	\uNNNN (Unicode 4位十六进制)
//	\UNNNNNNNN (Unicode 8位十六进制)
//
// 升华改进(ELEVATED): 早期方案只剥引号不解码,$'\\x72\\x6d' 绕过安全检查.
// decodeANSIC 将转义序列转换为实际字符,防止通过文字转义绕过危险命令检测.
func decodeANSIC(s string) string {
	var result []byte
	i := 0
	for i < len(s) {
		if s[i] != '\\' || i+1 >= len(s) {
			result = append(result, s[i])
			i++
			continue
		}
		// 转义序列
		escape := s[i+1]
		switch escape {
		case '\\':
			result = append(result, '\\')
			i += 2
		case '\'':
			result = append(result, '\'')
			i += 2
		case '"':
			result = append(result, '"')
			i += 2
		case 'n':
			result = append(result, '\n')
			i += 2
		case 't':
			result = append(result, '\t')
			i += 2
		case 'r':
			result = append(result, '\r')
			i += 2
		case 'a':
			result = append(result, '\a')
			i += 2
		case 'b':
			result = append(result, '\b')
			i += 2
		case 'f':
			result = append(result, '\f')
			i += 2
		case 'v':
			result = append(result, '\v')
			i += 2
		case '0':
			result = append(result, 0)
			i += 2
		case 'x':
			// \xNN:十六进制,1-2位
			if i+2 < len(s) {
				hex := s[i+2 : min(i+4, len(s))]
				val := parseHex(hex)
				if val >= 0 {
					result = append(result, byte(val))
					i += 2 + len(hex)
				} else {
					result = append(result, s[i])
					i++
				}
			} else {
				result = append(result, s[i])
				i++
			}
		case 'u', 'U':
			// \uNNNN 或 \UNNNNNNNN
			digits := 4
			if escape == 'U' {
				digits = 8
			}
			if i+2+digits <= len(s) {
				hex := s[i+2 : i+2+digits]
				val := parseHex(hex)
				if val >= 0 {
					result = append(result, utf8Rune(val)...)
					i += 2 + digits
				} else {
					result = append(result, s[i])
					i++
				}
			} else {
				result = append(result, s[i])
				i++
			}
		default:
			// 其他 \X:保留字符 X
			if escape >= '0' && escape <= '7' {
				// \NNN:八进制,1-3位
				oct := s[i+1 : min(i+4, len(s))]
				val := parseOct(oct)
				if val >= 0 {
					result = append(result, byte(val))
					i += 1 + len(oct)
				} else {
					result = append(result, s[i])
					i++
				}
			} else {
				result = append(result, s[i])
				i++
			}
		}
	}
	return string(result)
}

// parseHex 解析十六进制字符串(1-4字符),返回 -1 如果无效.
func parseHex(s string) int {
	val := 0
	for _, c := range s {
		val <<= 4
		var d int
		switch {
		case c >= '0' && c <= '9':
			d = int(c - '0')
		case c >= 'a' && c <= 'f':
			d = int(c - 'a' + 10)
		case c >= 'A' && c <= 'F':
			d = int(c - 'A' + 10)
		default:
			return -1
		}
		val += d
	}
	return val
}

// parseOct 解析八进制字符串(1-3字符),返回 -1 如果无效.
func parseOct(s string) int {
	val := 0
	for _, c := range s {
		if c < '0' || c > '7' {
			return -1
		}
		val = val*8 + int(c-'0')
	}
	return val
}

// utf8Rune 将一个 Unicode 码点转换为 UTF-8 编码的字节序列.
func utf8Rune(r int) []byte {
	// 将 int 作为 rune 处理
	ru := rune(r)
	var buf [4]byte
	n := utf8.EncodeRune(buf[:], ru)
	return buf[:n]
}

// ExtractCommandName 从命令中提取真正的命令名和子命令.
//
// 跳过以下前缀:
//   - env [VAR=value]* cmd      → cmd
//   - sudo [-u user] cmd        → cmd
//   - nohup cmd                 → cmd
//   - time cmd                  → cmd
//   - nice / ionice / strace 等 → cmd
//   - stdbuf 等                 → cmd
//
// 返回 (command, subcommand).
// 对于 "git push --force",返回 ("git", "push").
func ExtractCommandName(cmd *CommandInfo) (command string, subcommand string) {
	if cmd == nil || cmd.Name == "" {
		return "", ""
	}

	// 构建完整的 word 列表(命令名 + 参数)
	words := make([]string, 0, 1+len(cmd.Args))
	words = append(words, cmd.Name)
	words = append(words, cmd.Args...)

	// 跳过前缀
	idx := 0
	for idx < len(words) {
		word := words[idx]

		// 跳过环境变量赋值(已经在 Assignments 中处理,
		// 但有些赋值可能出现在命令名后面--如 env VAR=x cmd)
		if strings.Contains(word, "=") && !strings.HasPrefix(word, "-") && !strings.HasPrefix(word, "/") {
			idx++
			continue
		}

		// 精妙之处(CLEVER): 逐层剥洋葱式跳过命令前缀--env/sudo/nohup/nice/ionice/stdbuf
		// 每种前缀的选项语法不同(sudo 有 -u user,nice 有 -n N),必须精确跳过参数.
		// 这是安全分析的关键路径:跳过少了会把 sudo 当命令名,跳过多了会漏掉真正的命令.
		switch word {
		case "env":
			idx++
			continue
		case "sudo":
			idx++
			// sudo 可能有选项(-u user, -E 等)
			for idx < len(words) && strings.HasPrefix(words[idx], "-") {
				opt := words[idx]
				idx++
				// 带参数的选项
				if (opt == "-u" || opt == "-g" || opt == "-C" || opt == "-p") && idx < len(words) {
					idx++
				}
			}
			continue
		case "nohup", "time", "strace", "ltrace",
			// 升华改进(ELEVATED): 补全常见提权/脚本执行前缀--
			// 早期方案缺少 doas(OpenBSD sudo 替代品),run0(systemd 权限提升),
			// sg(切换组执行),expect(TCL 脚本运行器,常用于自动化提权).
			// 攻击者可用 "doas rm -rf /" 绕过只检查 sudo 的防护.
			"doas", "run0", "sg",
			// expect/unbuffer:脚本自动化工具,可包裹任意命令
			"expect", "unbuffer",
			// 时间测量工具(同 time 语义)
			"timeout", "systemd-run":
			idx++
			continue
		case "nice":
			idx++
			// nice 可能有 -n N 选项
			for idx < len(words) && strings.HasPrefix(words[idx], "-") {
				opt := words[idx]
				idx++
				// -n 需要跳过后面的参数
				if opt == "-n" && idx < len(words) {
					idx++
				}
			}
			continue
		case "ionice":
			idx++
			// ionice 可能有 -c N -n N 选项
			for idx < len(words) && strings.HasPrefix(words[idx], "-") {
				opt := words[idx]
				idx++
				if (opt == "-c" || opt == "-n" || opt == "-p") && idx < len(words) {
					idx++
				}
			}
			continue
		case "stdbuf":
			idx++
			// stdbuf 有 -i/-o/-e 选项
			for idx < len(words) && strings.HasPrefix(words[idx], "-") {
				idx++
				// -oL 等融合形式不需要跳过额外参数
			}
			continue
		}

		break
	}

	if idx >= len(words) {
		return "", ""
	}

	command = words[idx]
	if idx+1 < len(words) {
		sub := words[idx+1]
		if !strings.HasPrefix(sub, "-") {
			subcommand = sub
		}
	}

	return command, subcommand
}

// IsStaticRedirectTarget 检查重定向目标是否是静态的(无变量/命令替换).
//
// 升华改进(ELEVATED): 增强重定向目标的静态性检查,移植早期方案的安全细节.
// 替代方案:只检查 $ 和 `(简单版,覆盖 90% 场景但有安全盲点).
//
// 静态目标可以在编译时确定文件路径,用于安全规则匹配.
// 动态目标(含 $,`,* 等)可能在运行时展开为任意值.
// 精妙之处(CLEVER): 防御性检查链--逐一排除所有可能在运行时展开为意外值的字符.
// 这个函数看似简单,但遗漏任何一项都可能导致安全规则被绕过,
// 例如漏掉 ~ 检查就会让 `>~/.bashrc` 逃过路径权限检测.
func IsStaticRedirectTarget(target string) bool {
	if target == "" {
		return false
	}
	// 空格防护:cat > out /etc/passwd 被解析器合并为 "out /etc/passwd"
	// \r 也要拦截--CR 注入可以在终端上伪装文件名显示
	if strings.ContainsAny(target, " \t\n\r") {
		return false
	}
	// 包含引号--引号在重定向目标中说明解析器没有正确去壳,不信任
	if strings.ContainsAny(target, "'\"") {
		return false
	}
	// # 差异防护:shell-quote 把 #foo 解析为注释,bash 中 > #file 是语法错误
	if strings.HasPrefix(target, "#") {
		return false
	}
	// 历史展开防护:!!, !-1, !foo
	if strings.HasPrefix(target, "!") {
		return false
	}
	// Zsh = 展开防护:=cmd 展开为 /path/to/cmd
	if strings.HasPrefix(target, "=") {
		return false
	}
	// 变量展开
	if strings.Contains(target, "$") {
		return false
	}
	// 命令替换
	if strings.Contains(target, "`") {
		return false
	}
	// Glob 模式
	if strings.ContainsAny(target, "*?[") {
		return false
	}
	// 升华改进(ELEVATED): 波浪号不直接拒绝,而是解析为绝对路径后视为静态.
	// ~/output.txt 展开后是 /home/user/output.txt,这是一个确定的路径.
	// 但 ~otheruser/file 我们无法确定,拒绝.
	// 替代方案:直接拒绝所有 ~ 开头的目标(更保守,但 ~/file 是常见合法用法).
	if strings.HasPrefix(target, "~") {
		if target == "~" || strings.HasPrefix(target, "~/") {
			// ~/path 可以解析为绝对路径,视为静态
			// 实际路径检查由权限系统的路径规则负责
		} else {
			// ~otheruser/path 无法确定,拒绝
			return false
		}
	}
	// & 文件描述符
	if strings.Contains(target, "&") {
		return false
	}
	// 包含进程替换
	if strings.ContainsAny(target, "<>") {
		return false
	}
	// 花括号展开 {a,b}
	if strings.Contains(target, "{") {
		return false
	}
	return true
}

// ResolveHeredocBody returns the runtime-effective heredoc body.
// When RedirectionInfo.HeredocStripTabs is true (the <<- form), bash
// strips leading tabs from every body line at runtime -- so a safety
// consumer matching literal paths or rerunning Parse on the body must
// look at the stripped content, not the raw one. For the plain <<
// form the body is returned unchanged.
//
// Returns "" for non-heredoc or nil input so callers can unconditionally
// call it without checking Operator first.
//
// Cross-package consumers (pkg/permission/bash_security.go) use this
// to feed dangerous-file detection with the same string bash actually
// writes to a redirect target, closing the analyze-vs-runtime gap.
//
// 返回 heredoc body 的运行时有效内容. 当 RedirectionInfo.HeredocStripTabs
// 为 true (<<- 形式) 时, bash 运行时会去掉 body 每行的 leading tab,
// 安全消费者做字面路径匹配或对 body 再 Parse 必须看 strip 后的内容,
// 否则分析和运行时错位. 普通 << 形式 body 原样返回.
//
// 非 heredoc 或 nil 输入返回 "", 调用方无需先判断 Operator 即可调用.
//
// 跨包消费者 (pkg/permission/bash_security.go) 通过它喂 dangerous-file
// 检测, 让分析看到的 body 和 bash 写到重定向目标的一致, 消除分析与
// 运行时不一致的盲区.
func ResolveHeredocBody(redir *RedirectionInfo) string {
	if redir == nil {
		return ""
	}
	if !strings.HasPrefix(redir.Operator, "<<") {
		return ""
	}
	body := redir.HeredocBody
	if !redir.HeredocStripTabs || body == "" {
		return body
	}
	// <<- strips one-or-more leading tabs from each line (matches bash
	// behavior: only tabs, never spaces).
	//
	// <<- 去掉每行的连续行首 tab (匹配 bash: 只吃 tab, 不吃空格).
	lines := strings.Split(body, "\n")
	for i, line := range lines {
		lines[i] = strings.TrimLeft(line, "\t")
	}
	return strings.Join(lines, "\n")
}

// GetCommandPrefixes 获取命令的前缀列表,用于权限规则匹配.
//
// 对于命令 "npm install lodash",返回:
//
//	["npm", "npm install", "npm install lodash"]
func GetCommandPrefixes(cmd *CommandInfo) []string {
	if cmd == nil || cmd.Name == "" {
		return nil
	}

	// 构建完整的 word 列表
	words := make([]string, 0, 1+len(cmd.Args))
	words = append(words, cmd.Name)
	words = append(words, cmd.Args...)

	prefixes := make([]string, 0, len(words))
	var builder strings.Builder
	for i, word := range words {
		if i > 0 {
			builder.WriteByte(' ')
		}
		builder.WriteString(word)
		prefixes = append(prefixes, builder.String())
	}

	return prefixes
}