// 从 AST 提取安全相关信息. // // 本模块遍历 Bash AST,提取命令,参数,重定向等信息, // 供权限系统和安全检查使用. // // 关键设计: // - 递归遍历所有节点,包括子 shell,管道,列表中的命令 // - 跳过 env,sudo 等前缀,提取真正的命令名 // - 正确处理环境变量赋值前缀 // - 标记每个命令的上下文(是否在子 shell 中,管道位置等) package bash import ( "strings" "unicode/utf8" ) // CommandInfo 表示提取出的一个命令的完整信息. // // Background=true means this command was placed on the left of an `&` // connector at some ancestor level -- bash runs it in the background // and returns the prompt immediately, so the command escapes the // foreground shell's lifecycle (cancellation, timeout, parent signal). // Safety consumers treat this as a risk amplifier -- a backgrounded // `rm -rf` cannot be cancelled by aborting the interactive session. // // Background=true 表示该命令处于某层 `&` 连接符的左侧 -- bash 把它 // 放入后台并立即返回提示符, 命令即脱离前台 shell 的生命周期 (取消, // 超时, 父进程信号). 安全消费者把它视为风险放大因子 -- 后台的 // `rm -rf` 无法通过中止交互会话取消. type CommandInfo struct { Name string // 命令名(已跳过 env/sudo 等前缀) Args []string // 参数列表 Assignments []*Assignment // 环境变量前缀赋值 Redirections []*RedirectionInfo // 重定向列表 // Position is the sequential position within the current shell context. // Semantics verified against extract.go: NodeSubshell starts a fresh // context and resets Position to 0; NodeList / NodePipeline accumulate // outer ctx.position + inner index (so a command inside a nested list // sees outer parent position plus its own index). Position is NOT a // global absolute index across the whole command string — use it to // locate commands within a single shell level, not to uniquely // identify a command in nested structures. // // Position 是当前 shell context 内的顺序位置. 语义按 extract.go 核实: // NodeSubshell 开启新 context, Position 从 0 重算; NodeList / // NodePipeline 内累加外层 ctx.position + 内部索引 (嵌套 list 里的 // 命令看到的是外层父位置 + 自己索引). Position 不是跨整条命令串的 // 全局绝对索引 — 用于在同一 shell 层级内定位命令, 不能唯一标识嵌套 // 结构中的某条命令. Position int // Operator is the bash connector preceding this command (&& / || / | / ;). // The first command in a list / pipeline (i == 0) inherits the parent // ctx.operator; an outermost command has "" (no preceding operator). // Consumers: "" means "starting position with no prior connector"; // non-empty is one of &&, ||, |, ;. // // Operator 是命令前面的连接符 (&& / || / | / ;). list / pipeline 内第一个 // 命令 (i == 0) 从父 ctx.operator 继承, 最外层命令为 "" (无前序操作符). // 消费者: "" 表示"起始位置, 无前序连接符"; 非空为 &&, ||, |, ; 之一. Operator string // InSubshell is true iff the command is enclosed in a NodeSubshell. // Remains true inside nested subshells. // // InSubshell 在命令被 NodeSubshell 包裹时为 true. 嵌套 subshell 内仍为 true. InSubshell bool // InPipeline is true iff the command is a stage of a NodePipeline. // **Always check this before reading PipePosition** — non-pipe commands // have PipePosition == 0 (zero-value default), which collides with the // "first pipeline stage" value. InPipeline is the only reliable // discriminator. // // InPipeline 在命令为 NodePipeline 阶段时为 true. **读 PipePosition 前 // 必须先查 InPipeline** — 非管道命令 PipePosition == 0 (零值), 和"管道 // 第一阶段"的值冲突; InPipeline 是唯一可靠的区分. InPipeline bool // PipePosition is the stage index within a pipeline (0 = first stage). // For non-pipe commands this field defaults to 0 which carries no // meaning — always gate reads behind InPipeline. // // PipePosition 是管道内阶段位置 (0 = 第一阶段). 非管道命令时字段默认 0, // 无实际含义 — 读取前先检查 InPipeline. PipePosition int Background bool // 位于 `&` 左侧, 后台执行. EN: left of an `&` connector, backgrounded. RawText string // 命令的原始文本 // ArgQuoted is a parallel slice to Args (len(ArgQuoted) == len(Args)) // recording per-arg quoting regime at parse time. true means the arg // was a fully-literal QuotedString (single quotes '...' or ANSI-C // $'...') whose body undergoes no parameter / command substitution. // false means the arg was either unquoted or double-quoted "...", // where $VAR / $(cmd) expand at runtime. Mirrors the heredoc Quoted // contract on RedirectionInfo so arg-level security analysis can // distinguish "inert literal" args from "possibly expanded" args -- // a quoted `'$HOME/.ssh'` arg is *not* the same runtime target as // an unquoted `$HOME/.ssh`, and downstream consumers (permission // layer danger attribution) need the regime to render faithful // reasons without re-parsing the original source. // // ArgQuoted 是与 Args 平行的 slice (len(ArgQuoted) == len(Args)), // 记录每个 arg 在 parse 时的引号语义. true 表示该 arg 是完全字面 // QuotedString (单引号 '...' 或 ANSI-C $'...'), body 不做参数 / // 命令替换. false 表示 arg 未加引号或在双引号 "..." 内, $VAR / // $(cmd) 会在运行时展开. 与 RedirectionInfo 上 heredoc 的 Quoted // 契约镜像, 让 arg 级安全分析区分 "inert literal" 与 "可能展开" // -- `'$HOME/.ssh'` (quoted) 与 `$HOME/.ssh` (unquoted) 运行时 // 不是同一目标, 下游 (permission 层危险归因) 需要这个 regime 才 // 能给出忠实原因, 不必回到源文本二次解析. ArgQuoted []bool } // RedirectionInfo 表示一个重定向操作. // // Heredoc fields (Tag/StripTabs/Quoted/Body) are populated only for // NodeHeredoc redirections (Operator starting with "<<"). They surface // the canonical heredoc semantics recorded by the parser so security // consumers don't have to re-derive them from string shapes (e.g. // consumers can read StripTabs directly rather than comparing // Operator == "<<-"). Body is the raw heredoc content; Quoted=true // means the delimiter was quoted (<<'EOF' / <<"EOF" / <<\EOF) so no // parameter / command substitution expands inside Body -- a key gate // for whether safety analysis needs to recursively parse Body. // // Heredoc 字段 (Tag/StripTabs/Quoted/Body) 仅对 NodeHeredoc 重定向 // (Operator 以 "<<" 开头) 填值, 把 parser 记录的 canonical heredoc // 语义暴露给安全消费者, 避免它们再从字符串形状派生 (例如消费者直接 // 读 StripTabs 即可, 不必比较 Operator == "<<-"). Body 是 heredoc // 原始内容; Quoted=true 表示分隔符被引号包裹 (<<'EOF' / <<"EOF" / // <<\EOF), Body 内的 $VAR / $(cmd) 不会展开 -- 这是安全分析是否需 // 要递归解析 Body 的关键门控. type RedirectionInfo struct { Operator string // 操作符:>, >>, <, <<, 2>&1 等 Target string // 目标文件/fd IsStatic bool // 目标是否静态(无变量展开) HeredocTag string // Heredoc 分隔符标签 (仅 Operator 以 "<<" 开头时填值). EN: heredoc delimiter tag, populated only for "<<*" operators. HeredocStripTabs bool // <<- 变体, 去行首 tab. EN: <<- variant, strip leading tabs from body. HeredocQuoted bool // 分隔符被引号包裹, body 不展开变量. EN: delimiter was quoted; body does NOT expand variables / command substitutions. HeredocBody string // Heredoc 原始 body. EN: raw heredoc body text. // HeredocBodyStart / HeredocBodyEnd: Heredoc body 在**原文**的字节 // 半开区间, 仅 Operator 以 "<<" 开头且成功匹配到 body 时非零. 让 // AnalyzeDanger 等安全消费者把危险 heredoc 定位回用户源码位置, // DangerInfo 可 surface "source bytes N-M" 给运维 / TUI / 审计. // // HeredocBodyStart / HeredocBodyEnd: byte offsets of the heredoc // body within the ORIGINAL source (half-open interval). Populated // only for "<<*" operators with a matched body. Lets security // consumers (AnalyzeDanger) point DangerInfo back to concrete // "source bytes N-M" for operators / TUI / audit. HeredocBodyStart int HeredocBodyEnd int } // extractContext 在遍历 AST 时传递的上下文信息. // // background is set when an ancestor NodeList has Background=true and // we're traversing its left subtree. It propagates down so every // SimpleCommand in the backgrounded subtree inherits the flag. // // background 在祖先 NodeList.Background=true 且遍历左子树时置位, 向下 // 传播, 让后台子树内所有 SimpleCommand 继承该 flag. type extractContext struct { inSubshell bool inPipeline bool pipePosition int operator string position int depth int // 递归深度 background bool } // MaxRecursionDepth 是 AST 遍历的最大递归深度. // // 精妙之处(CLEVER): 防止 $($($(nested))) 这种深度嵌套导致栈溢出. // 超过深度限制的命令直接跳过提取,交给 AI 分类器或用户确认. // 20 层足以覆盖任何正常脚本--实测 Linux kernel 的 configure 脚本最深也就 8 层. const MaxRecursionDepth = 20 // ExtractCommands 从 AST 中提取所有简单命令. // // 递归遍历 AST 树,找到所有 SimpleCommand 节点,提取命令信息. // 嵌套在子 shell,管道,列表中的命令都会被提取. // 受 MaxRecursionDepth 限制,超深度的子树直接跳过. // // 注意:此函数不进入 NodeCommandSubstitution 节点内部. // 若需要递归提取命令替换($(...))内的命令,使用 ExtractAllCommands. func ExtractCommands(root *Node) []*CommandInfo { if root == nil { return nil } ctx := extractContext{depth: 0} var result []*CommandInfo extractFromNode(root, ctx, &result) return result } // ExtractAllCommands 从 AST 中递归提取所有命令,包括命令替换 $(...) 和反引号内的命令. // // 与 ExtractCommands 的区别: // - ExtractCommands 只提取顶层 AST 的命令,NodeCommandSubstitution 内部不递归 // - ExtractAllCommands 对每个 NodeCommandSubstitution 节点,剥去 $(...) 外壳后 // 重新 Parse 内部文本,再递归提取其中的命令 // // 精妙之处(CLEVER): 命令替换是安全检查最容易被绕过的地方-- // `echo $(rm -rf /)` 中 rm -rf / 嵌套在命令替换里,ExtractCommands 只看到 echo, // 安全检查会认为这条命令无害.ExtractAllCommands 递归展开所有嵌套层, // 让危险命令无处遁形. // 升华改进(ELEVATED): 反引号形式 `cmd` 和 $(...) 形式统一处理,覆盖全部命令替换语法. // 替代方案:用正则匹配 $(...) 内容--否决:正则无法处理嵌套括号, // 例如 $(echo $(rm -rf /)) 会导致括号匹配错误. func ExtractAllCommands(root *Node) []*CommandInfo { if root == nil { return nil } ctx := extractContext{depth: 0} var result []*CommandInfo extractAllFromNode(root, ctx, &result) return result } // extractAllFromNode 从指定节点开始递归提取命令,并进入 NodeCommandSubstitution 内部. func extractAllFromNode(node *Node, ctx extractContext, result *[]*CommandInfo) { if node == nil { return } if ctx.depth > MaxRecursionDepth { return } childDepth := ctx.depth + 1 switch node.Type { case NodeCommandSubstitution: // 剥去外层 $(...) 或反引号,重新解析内部内容 // 精妙之处(CLEVER): NodeCommandSubstitution 的 Value 是原始文本(含外壳), // 例如 "$(rm -rf /)" 或 "`rm -rf /`". // 必须先剥壳才能 Parse,否则解析器会把整段当作命令替换再次打包, // 导致无限循环或解析错误. inner := stripCommandSubstitution(node.Value) if inner == "" { return } // 递归深度传递:同一层节点调用,depth 不加 1 // 但内部解析出新 AST,使用 childDepth 防止无限递归 innerCtx := ctx innerCtx.depth = childDepth innerRoot := Parse(inner) if innerRoot != nil { extractAllFromNode(innerRoot, innerCtx, result) } case NodeProgram: for i, child := range node.Children { childCtx := ctx childCtx.position = i childCtx.depth = childDepth extractAllFromNode(child, childCtx, result) } case NodeList: for i, child := range node.Children { childCtx := ctx if i > 0 { childCtx.operator = node.Operator } childCtx.position = ctx.position + i childCtx.depth = childDepth // `&` semantics: only the left side goes background. // If this list terminates with `&` (Background=true), // left child inherits the flag; right child is the // foreground continuation and explicitly clears any // inherited background from ancestors. // For non-`&` lists (`&&` / `||` / `;`), ctx.background // propagates through unchanged -- a parent `&` that // backgrounded the whole group still reaches all // commands. // // `&` 语义: 只左侧入后台. 当前 list 以 `&` 收尾 // (Background=true) 时, 左 child 继承 flag, 右 child // 是 `&` 之后的前台延续, 显式清掉祖先继承的 background. // 非 `&` 连接符 (`&&` / `||` / `;`) 时 ctx.background // 原样传递 -- 把整组置后台的祖先 `&` 仍能覆盖到里面 // 的所有命令. if node.Background { childCtx.background = (i == 0) } extractAllFromNode(child, childCtx, result) } case NodePipeline: for i, child := range node.Children { childCtx := ctx childCtx.inPipeline = true childCtx.pipePosition = i childCtx.position = ctx.position + i childCtx.depth = childDepth if i > 0 { childCtx.operator = "|" } extractAllFromNode(child, childCtx, result) } case NodeSubshell: childCtx := ctx childCtx.inSubshell = true childCtx.depth = childDepth for i, child := range node.Children { childCtx.position = i extractAllFromNode(child, childCtx, result) } case NodeCompoundCommand: for i, child := range node.Children { childCtx := ctx childCtx.position = i childCtx.depth = childDepth extractAllFromNode(child, childCtx, result) } case NodeIf, NodeFor, NodeWhile, NodeCase: childCtx := ctx childCtx.depth = childDepth for _, child := range node.Children { extractAllFromNode(child, childCtx, result) } case NodeFunction: childCtx := ctx childCtx.depth = childDepth for _, child := range node.Children { extractAllFromNode(child, childCtx, result) } case NodeSimpleCommand: // 提取命令本身 cmd := extractSimpleCommand(node, ctx) if cmd != nil { *result = append(*result, cmd) } // 同时深入命令的参数节点,查找嵌套的命令替换 childCtx := ctx childCtx.depth = childDepth for _, child := range node.Children { // 无论子节点类型,都递归检查(NodeCommandSubstitution 会在递归时被剥壳处理) extractAllFromNode(child, childCtx, result) } // 精妙之处(CLEVER): 赋值语句 VAR=`cmd` 的命令替换藏在 Assignments 字段的 Value 里, // 不是 Children 节点,必须单独处理. // 例如 result=`sudo id` 解析后 node.Assignments[0].Value = "`sudo id`", // Children 里看不到命令替换节点. // 升华改进(ELEVATED): 支持赋值 RHS 里的任意命令替换,覆盖 VAR=$(cmd) 和 VAR=`cmd` 两种语法. for _, assign := range node.Assignments { if assign == nil { continue } v := assign.Value // 检测赋值值是否包含命令替换($(...) 或反引号) if strings.Contains(v, "$(") || strings.Contains(v, "`") { inner := stripCommandSubstitution(v) if inner != "" && inner != v { innerRoot := Parse(inner) if innerRoot != nil { innerCtx := ctx innerCtx.depth = childDepth extractAllFromNode(innerRoot, innerCtx, result) } } } } // Heredoc body can carry nested command substitutions that // bypass top-level scans just like Assignments.Value does: // cat <= 3 && s[0] == '$' && s[1] == '(' && s[len(s)-1] == ')' { return strings.TrimSpace(s[2 : len(s)-1]) } // 反引号格式 `...` if len(s) >= 2 && s[0] == '`' && s[len(s)-1] == '`' { return strings.TrimSpace(s[1 : len(s)-1]) } // 已经是内部文本(无外壳),直接返回 return s } // extractFromNode 从指定节点开始递归提取命令. // // 精妙之处(CLEVER): depth 参数追踪递归深度,超过 MaxRecursionDepth 时停止递归. // 这不是性能优化--正常脚本不会到 20 层.这是对抗恶意构造的深层嵌套, // 例如 $($($(... 20层 ...))),防止栈溢出导致进程崩溃. func extractFromNode(node *Node, ctx extractContext, result *[]*CommandInfo) { if node == nil { return } if ctx.depth > MaxRecursionDepth { return // 超深度,停止递归 } // 每次递归子节点时 depth+1 childDepth := ctx.depth + 1 switch node.Type { case NodeProgram: // 遍历所有子语句 for i, child := range node.Children { childCtx := ctx childCtx.position = i childCtx.depth = childDepth extractFromNode(child, childCtx, result) } case NodeList: // 列表节点:左右两个子命令,通过操作符连接 for i, child := range node.Children { childCtx := ctx if i > 0 { childCtx.operator = node.Operator } childCtx.position = ctx.position + i childCtx.depth = childDepth // Same `&` background propagation as extractAllFromNode // (left inherits, right clears). Kept duplicated for now // because the two traversals diverge on CommandSubstitution // handling -- unifying would cost clarity. // // 与 extractAllFromNode 同形的 `&` 后台传播 (左继承, 右 // 清掉). 暂保留双份 -- 两条遍历在命令替换处理上有分歧, // 合并会牺牲清晰度. if node.Background { childCtx.background = (i == 0) } extractFromNode(child, childCtx, result) } case NodePipeline: // 管道节点:多个命令通过 | 连接 for i, child := range node.Children { childCtx := ctx childCtx.inPipeline = true childCtx.pipePosition = i childCtx.position = ctx.position + i childCtx.depth = childDepth if i > 0 { childCtx.operator = "|" } extractFromNode(child, childCtx, result) } case NodeSubshell: // 子 shell:标记上下文 childCtx := ctx childCtx.inSubshell = true childCtx.depth = childDepth for i, child := range node.Children { childCtx.position = i extractFromNode(child, childCtx, result) } case NodeCompoundCommand: // 花括号复合命令:遍历子命令 for i, child := range node.Children { childCtx := ctx childCtx.position = i childCtx.depth = childDepth extractFromNode(child, childCtx, result) } case NodeIf, NodeFor, NodeWhile, NodeCase: // 控制结构:遍历所有子节点 childCtx := ctx childCtx.depth = childDepth for _, child := range node.Children { extractFromNode(child, childCtx, result) } case NodeFunction: // 函数定义:遍历函数体 childCtx := ctx childCtx.depth = childDepth for _, child := range node.Children { extractFromNode(child, childCtx, result) } case NodeSimpleCommand: // 提取命令信息 cmd := extractSimpleCommand(node, ctx) if cmd != nil { *result = append(*result, cmd) } default: // 其他节点类型:递归遍历子节点 childCtx := ctx childCtx.depth = childDepth for _, child := range node.Children { extractFromNode(child, childCtx, result) } } } // extractSimpleCommand 从 SimpleCommand 节点提取命令信息. func extractSimpleCommand(node *Node, ctx extractContext) *CommandInfo { if node == nil || node.Type != NodeSimpleCommand { return nil } cmd := &CommandInfo{ Operator: ctx.operator, InSubshell: ctx.inSubshell, InPipeline: ctx.inPipeline, PipePosition: ctx.pipePosition, Position: ctx.position, Background: ctx.background, RawText: node.Value, } // 收集赋值,命令名,参数,重定向. // wordQuoted 与 words 同长度平行, 记录每个 word 的 parser 端 // Quoted 语义 -- 仅 NodeQuotedString 的子节点 (child.Quoted) 才 // 视作 literal quoted, 其他 (NodeWord / NodeVariableExpansion / // NodeCommandSubstitution) 视为未加引号 (false). 这一步是让 // parser 写入的 Node.Quoted 流到 CommandInfo.ArgQuoted 的唯一 // 产线读点. // // wordQuoted runs parallel to words, recording each word's // parser-side Quoted regime -- only a NodeQuotedString child's // child.Quoted is honored as literal-quoted; other node types // (NodeWord / NodeVariableExpansion / NodeCommandSubstitution) // are treated as unquoted (false). This is the sole production // read-site that flows the parser-written Node.Quoted through // to CommandInfo.ArgQuoted. var words []string var wordQuoted []bool for _, child := range node.Children { switch child.Type { case NodeAssignment: // 已经在 node.Assignments 中 case NodeRedirection, NodeHeredoc: redir := &RedirectionInfo{ Operator: child.RedirectOp, Target: child.RedirectTarget, IsStatic: IsStaticRedirectTarget(child.RedirectTarget), } if child.Type == NodeHeredoc { // Forward canonical heredoc fields so security consumers // (bash_security.go IsDangerousCommand / AnalyzeDanger) // can analyze Body and gate on Quoted. // // 把 canonical heredoc 字段透传给安全消费者 // (bash_security.go IsDangerousCommand / AnalyzeDanger), // 让它们能分析 Body 并用 Quoted 门控展开. redir.HeredocTag = child.HeredocTag redir.HeredocStripTabs = child.HeredocStripTabs redir.HeredocQuoted = child.HeredocQuoted redir.HeredocBody = child.HeredocBody redir.HeredocBodyStart = child.HeredocBodyStart redir.HeredocBodyEnd = child.HeredocBodyEnd // Heredoc target semantically IS the tag. Parser leaves // RedirectTarget empty on NodeHeredoc; fill it here so // Target remains a single honest surface for all redirs. // // Heredoc 的 target 语义上就是 tag. parser 在 NodeHeredoc // 上没设 RedirectTarget, 这里补上, 让 Target 字段对所有 // 重定向保持统一语义. if redir.Target == "" { redir.Target = child.HeredocTag redir.IsStatic = IsStaticRedirectTarget(child.HeredocTag) } } cmd.Redirections = append(cmd.Redirections, redir) default: // Word / QuotedString / VariableExpansion / CommandSubstitution words = append(words, resolveWordValue(child)) wordQuoted = append(wordQuoted, child.Type == NodeQuotedString && child.Quoted) } } cmd.Assignments = node.Assignments if len(words) > 0 { cmd.Name = words[0] cmd.Args = words[1:] // Parallel to Args (skip [0] name). Nil when no args so // consumers can treat nil and empty identically. // // 与 Args 平行 (跳过 [0] 命令名). 无参数时保持 nil, 消费方 // 可统一视作空切片. if len(wordQuoted) > 1 { cmd.ArgQuoted = wordQuoted[1:] } } return cmd } // resolveWordValue 解析 word 节点的值(去除引号等). // // 对于不同类型的节点: // - NodeWord: 返回原始文本 // - NodeQuotedString: 去除外层引号 // - NodeVariableExpansion: 返回原始文本(运行时才知道值) // - NodeCommandSubstitution: 返回原始文本 func resolveWordValue(node *Node) string { if node == nil { return "" } switch node.Type { case NodeQuotedString: v := node.Value // 去除外层引号 if len(v) >= 2 { if v[0] == '\'' && v[len(v)-1] == '\'' { return v[1 : len(v)-1] } if v[0] == '"' && v[len(v)-1] == '"' { return v[1 : len(v)-1] } // ANSI-C: $'...' if len(v) >= 3 && v[0] == '$' && v[1] == '\'' && v[len(v)-1] == '\'' { return decodeANSIC(v[2 : len(v)-1]) } } return v case NodeWord: // 如果有子节点(复合 word),递归解析 if len(node.Children) > 0 { var parts []string for _, child := range node.Children { parts = append(parts, resolveWordValue(child)) } return strings.Join(parts, "") } return node.Value default: return node.Value } } // decodeANSIC 解码 ANSI-C 转义序列(bash $'...' 引号内的内容). // // 支持的转义序列: // // \\, \', \", \n, \t, \r, \a, \b, \f, \v, \0 // \xNN (十六进制,1-2位) // \NNN (八进制,1-3位) // \uNNNN (Unicode 4位十六进制) // \UNNNNNNNN (Unicode 8位十六进制) // // 升华改进(ELEVATED): 早期方案只剥引号不解码,$'\\x72\\x6d' 绕过安全检查. // decodeANSIC 将转义序列转换为实际字符,防止通过文字转义绕过危险命令检测. func decodeANSIC(s string) string { var result []byte i := 0 for i < len(s) { if s[i] != '\\' || i+1 >= len(s) { result = append(result, s[i]) i++ continue } // 转义序列 escape := s[i+1] switch escape { case '\\': result = append(result, '\\') i += 2 case '\'': result = append(result, '\'') i += 2 case '"': result = append(result, '"') i += 2 case 'n': result = append(result, '\n') i += 2 case 't': result = append(result, '\t') i += 2 case 'r': result = append(result, '\r') i += 2 case 'a': result = append(result, '\a') i += 2 case 'b': result = append(result, '\b') i += 2 case 'f': result = append(result, '\f') i += 2 case 'v': result = append(result, '\v') i += 2 case '0': result = append(result, 0) i += 2 case 'x': // \xNN:十六进制,1-2位 if i+2 < len(s) { hex := s[i+2 : min(i+4, len(s))] val := parseHex(hex) if val >= 0 { result = append(result, byte(val)) i += 2 + len(hex) } else { result = append(result, s[i]) i++ } } else { result = append(result, s[i]) i++ } case 'u', 'U': // \uNNNN 或 \UNNNNNNNN digits := 4 if escape == 'U' { digits = 8 } if i+2+digits <= len(s) { hex := s[i+2 : i+2+digits] val := parseHex(hex) if val >= 0 { result = append(result, utf8Rune(val)...) i += 2 + digits } else { result = append(result, s[i]) i++ } } else { result = append(result, s[i]) i++ } default: // 其他 \X:保留字符 X if escape >= '0' && escape <= '7' { // \NNN:八进制,1-3位 oct := s[i+1 : min(i+4, len(s))] val := parseOct(oct) if val >= 0 { result = append(result, byte(val)) i += 1 + len(oct) } else { result = append(result, s[i]) i++ } } else { result = append(result, s[i]) i++ } } } return string(result) } // parseHex 解析十六进制字符串(1-4字符),返回 -1 如果无效. func parseHex(s string) int { val := 0 for _, c := range s { val <<= 4 var d int switch { case c >= '0' && c <= '9': d = int(c - '0') case c >= 'a' && c <= 'f': d = int(c - 'a' + 10) case c >= 'A' && c <= 'F': d = int(c - 'A' + 10) default: return -1 } val += d } return val } // parseOct 解析八进制字符串(1-3字符),返回 -1 如果无效. func parseOct(s string) int { val := 0 for _, c := range s { if c < '0' || c > '7' { return -1 } val = val*8 + int(c-'0') } return val } // utf8Rune 将一个 Unicode 码点转换为 UTF-8 编码的字节序列. func utf8Rune(r int) []byte { // 将 int 作为 rune 处理 ru := rune(r) var buf [4]byte n := utf8.EncodeRune(buf[:], ru) return buf[:n] } // ExtractCommandName 从命令中提取真正的命令名和子命令. // // 跳过以下前缀: // - env [VAR=value]* cmd → cmd // - sudo [-u user] cmd → cmd // - nohup cmd → cmd // - time cmd → cmd // - nice / ionice / strace 等 → cmd // - stdbuf 等 → cmd // // 返回 (command, subcommand). // 对于 "git push --force",返回 ("git", "push"). func ExtractCommandName(cmd *CommandInfo) (command string, subcommand string) { if cmd == nil || cmd.Name == "" { return "", "" } // 构建完整的 word 列表(命令名 + 参数) words := make([]string, 0, 1+len(cmd.Args)) words = append(words, cmd.Name) words = append(words, cmd.Args...) // 跳过前缀 idx := 0 for idx < len(words) { word := words[idx] // 跳过环境变量赋值(已经在 Assignments 中处理, // 但有些赋值可能出现在命令名后面--如 env VAR=x cmd) if strings.Contains(word, "=") && !strings.HasPrefix(word, "-") && !strings.HasPrefix(word, "/") { idx++ continue } // 精妙之处(CLEVER): 逐层剥洋葱式跳过命令前缀--env/sudo/nohup/nice/ionice/stdbuf // 每种前缀的选项语法不同(sudo 有 -u user,nice 有 -n N),必须精确跳过参数. // 这是安全分析的关键路径:跳过少了会把 sudo 当命令名,跳过多了会漏掉真正的命令. switch word { case "env": idx++ continue case "sudo": idx++ // sudo 可能有选项(-u user, -E 等) for idx < len(words) && strings.HasPrefix(words[idx], "-") { opt := words[idx] idx++ // 带参数的选项 if (opt == "-u" || opt == "-g" || opt == "-C" || opt == "-p") && idx < len(words) { idx++ } } continue case "nohup", "time", "strace", "ltrace", // 升华改进(ELEVATED): 补全常见提权/脚本执行前缀-- // 早期方案缺少 doas(OpenBSD sudo 替代品),run0(systemd 权限提升), // sg(切换组执行),expect(TCL 脚本运行器,常用于自动化提权). // 攻击者可用 "doas rm -rf /" 绕过只检查 sudo 的防护. "doas", "run0", "sg", // expect/unbuffer:脚本自动化工具,可包裹任意命令 "expect", "unbuffer", // 时间测量工具(同 time 语义) "timeout", "systemd-run": idx++ continue case "nice": idx++ // nice 可能有 -n N 选项 for idx < len(words) && strings.HasPrefix(words[idx], "-") { opt := words[idx] idx++ // -n 需要跳过后面的参数 if opt == "-n" && idx < len(words) { idx++ } } continue case "ionice": idx++ // ionice 可能有 -c N -n N 选项 for idx < len(words) && strings.HasPrefix(words[idx], "-") { opt := words[idx] idx++ if (opt == "-c" || opt == "-n" || opt == "-p") && idx < len(words) { idx++ } } continue case "stdbuf": idx++ // stdbuf 有 -i/-o/-e 选项 for idx < len(words) && strings.HasPrefix(words[idx], "-") { idx++ // -oL 等融合形式不需要跳过额外参数 } continue } break } if idx >= len(words) { return "", "" } command = words[idx] if idx+1 < len(words) { sub := words[idx+1] if !strings.HasPrefix(sub, "-") { subcommand = sub } } return command, subcommand } // IsStaticRedirectTarget 检查重定向目标是否是静态的(无变量/命令替换). // // 升华改进(ELEVATED): 增强重定向目标的静态性检查,移植早期方案的安全细节. // 替代方案:只检查 $ 和 `(简单版,覆盖 90% 场景但有安全盲点). // // 静态目标可以在编译时确定文件路径,用于安全规则匹配. // 动态目标(含 $,`,* 等)可能在运行时展开为任意值. // 精妙之处(CLEVER): 防御性检查链--逐一排除所有可能在运行时展开为意外值的字符. // 这个函数看似简单,但遗漏任何一项都可能导致安全规则被绕过, // 例如漏掉 ~ 检查就会让 `>~/.bashrc` 逃过路径权限检测. func IsStaticRedirectTarget(target string) bool { if target == "" { return false } // 空格防护:cat > out /etc/passwd 被解析器合并为 "out /etc/passwd" // \r 也要拦截--CR 注入可以在终端上伪装文件名显示 if strings.ContainsAny(target, " \t\n\r") { return false } // 包含引号--引号在重定向目标中说明解析器没有正确去壳,不信任 if strings.ContainsAny(target, "'\"") { return false } // # 差异防护:shell-quote 把 #foo 解析为注释,bash 中 > #file 是语法错误 if strings.HasPrefix(target, "#") { return false } // 历史展开防护:!!, !-1, !foo if strings.HasPrefix(target, "!") { return false } // Zsh = 展开防护:=cmd 展开为 /path/to/cmd if strings.HasPrefix(target, "=") { return false } // 变量展开 if strings.Contains(target, "$") { return false } // 命令替换 if strings.Contains(target, "`") { return false } // Glob 模式 if strings.ContainsAny(target, "*?[") { return false } // 升华改进(ELEVATED): 波浪号不直接拒绝,而是解析为绝对路径后视为静态. // ~/output.txt 展开后是 /home/user/output.txt,这是一个确定的路径. // 但 ~otheruser/file 我们无法确定,拒绝. // 替代方案:直接拒绝所有 ~ 开头的目标(更保守,但 ~/file 是常见合法用法). if strings.HasPrefix(target, "~") { if target == "~" || strings.HasPrefix(target, "~/") { // ~/path 可以解析为绝对路径,视为静态 // 实际路径检查由权限系统的路径规则负责 } else { // ~otheruser/path 无法确定,拒绝 return false } } // & 文件描述符 if strings.Contains(target, "&") { return false } // 包含进程替换 if strings.ContainsAny(target, "<>") { return false } // 花括号展开 {a,b} if strings.Contains(target, "{") { return false } return true } // ResolveHeredocBody returns the runtime-effective heredoc body. // When RedirectionInfo.HeredocStripTabs is true (the <<- form), bash // strips leading tabs from every body line at runtime -- so a safety // consumer matching literal paths or rerunning Parse on the body must // look at the stripped content, not the raw one. For the plain << // form the body is returned unchanged. // // Returns "" for non-heredoc or nil input so callers can unconditionally // call it without checking Operator first. // // Cross-package consumers (pkg/permission/bash_security.go) use this // to feed dangerous-file detection with the same string bash actually // writes to a redirect target, closing the analyze-vs-runtime gap. // // 返回 heredoc body 的运行时有效内容. 当 RedirectionInfo.HeredocStripTabs // 为 true (<<- 形式) 时, bash 运行时会去掉 body 每行的 leading tab, // 安全消费者做字面路径匹配或对 body 再 Parse 必须看 strip 后的内容, // 否则分析和运行时错位. 普通 << 形式 body 原样返回. // // 非 heredoc 或 nil 输入返回 "", 调用方无需先判断 Operator 即可调用. // // 跨包消费者 (pkg/permission/bash_security.go) 通过它喂 dangerous-file // 检测, 让分析看到的 body 和 bash 写到重定向目标的一致, 消除分析与 // 运行时不一致的盲区. func ResolveHeredocBody(redir *RedirectionInfo) string { if redir == nil { return "" } if !strings.HasPrefix(redir.Operator, "<<") { return "" } body := redir.HeredocBody if !redir.HeredocStripTabs || body == "" { return body } // <<- strips one-or-more leading tabs from each line (matches bash // behavior: only tabs, never spaces). // // <<- 去掉每行的连续行首 tab (匹配 bash: 只吃 tab, 不吃空格). lines := strings.Split(body, "\n") for i, line := range lines { lines[i] = strings.TrimLeft(line, "\t") } return strings.Join(lines, "\n") } // GetCommandPrefixes 获取命令的前缀列表,用于权限规则匹配. // // 对于命令 "npm install lodash",返回: // // ["npm", "npm install", "npm install lodash"] func GetCommandPrefixes(cmd *CommandInfo) []string { if cmd == nil || cmd.Name == "" { return nil } // 构建完整的 word 列表 words := make([]string, 0, 1+len(cmd.Args)) words = append(words, cmd.Name) words = append(words, cmd.Args...) prefixes := make([]string, 0, len(words)) var builder strings.Builder for i, word := range words { if i > 0 { builder.WriteByte(' ') } builder.WriteString(word) prefixes = append(prefixes, builder.String()) } return prefixes }