// patch.go -- 从编辑脚本生成结构化 patch(多 hunk 支持).
//
// 核心抽象:Hunk(差异块).一个 patch 由多个 hunk 组成,
// 每个 hunk 代表文件中一个连续的变更区域(含上下文行).
//
// 精妙之处(CLEVER): hunk 合并策略 -- 相距 <= 2*contextLines 的变更区域
// 合并为同一个 hunk.这避免了大量碎片化的小 hunk,让 diff 输出更紧凑可读.
// 2*contextLines 的阈值来源:两个相邻变更各自需要 contextLines 行的
// 后文/前文上下文,如果这两段上下文会重叠,不如合并成一个 hunk.
package diff

import (
	"fmt"
	"sort"
	"strings"
)

// Hunk 是一个差异块,包含变更行及其上下文.
type Hunk struct {
	OldStart int      // 旧文本中的起始行号(从 1 开始)
	OldLines int      // 旧文本中的行数(含上下文)
	NewStart int      // 新文本中的起始行号(从 1 开始)
	NewLines int      // 新文本中的行数(含上下文)
	Lines    []string // 差异行:" context" / "-deleted" / "+added"
}

// changeRegion 表示一个连续的变更区域(在编辑脚本中的索引范围).
type changeRegion struct {
	startIdx int // 编辑脚本中第一个变更的索引
	endIdx   int // 编辑脚本中最后一个变更的索引(包含)
}

// StructuredPatch 生成两段文本之间的结构化差异.
// contextLines 是每个 hunk 前后的上下文行数(通常 3).
//
// 算法步骤:
// 1. 调用 Diff 生成编辑脚本
// 2. 从编辑脚本中找出所有变更区域(连续的非 EditEqual 操作)
// 3. 合并相距 <= 2*contextLines 的变更区域
// 4. 为每个合并后的区域生成 hunk(含前后上下文)
// maxContextLines 是 contextLines 的硬性上限,防止调用方传入超大值导致内存耗尽.
// 100 行上下文已足够所有实际使用场景;超大值通常是调用方的 bug.
const maxContextLines = 100

func StructuredPatch(oldText, newText string, contextLines int) []Hunk {
	if contextLines < 0 {
		contextLines = 0
	}
	// 历史包袱(LEGACY): 早期方案无上限,用户传入 9999 可生成巨大 unified diff 导致 OOM.
	if contextLines > maxContextLines {
		contextLines = maxContextLines
	}

	oldLines := splitLines(oldText)
	newLines := splitLines(newText)

	edits := Diff(oldLines, newLines)

	if len(edits) == 0 {
		return nil
	}

	// 检查是否有实际变更
	hasChange := false
	for _, e := range edits {
		if e.Type != EditEqual {
			hasChange = true
			break
		}
	}
	if !hasChange {
		return nil
	}

	// 找出所有变更区域
	regions := findChangeRegions(edits)
	if len(regions) == 0 {
		return nil
	}

	// 合并相距过近的变更区域
	// 精妙之处(CLEVER): 合并条件是两个区域之间的 Equal 行数 <= 2*contextLines.
	// 因为区域 A 的后上下文和区域 B 的前上下文会重叠,合并后更紧凑.
	merged := mergeRegions(edits, regions, contextLines)

	// 为每个合并后的区域生成 hunk
	hunks := make([]Hunk, 0, len(merged))
	for _, region := range merged {
		hunk := buildHunk(edits, region, contextLines)
		if len(hunk.Lines) > 0 {
			hunks = append(hunks, hunk)
		}
	}

	return hunks
}

// splitLines 将文本按换行符分割为行切片.
// 空文本返回空切片(不是 [""]).
func splitLines(text string) []string {
	if text == "" {
		return []string{}
	}
	return strings.Split(text, "\n")
}

// findChangeRegions 从编辑脚本中提取所有连续的变更区域.
// 变更区域是编辑脚本中一段连续的非 EditEqual 操作.
func findChangeRegions(edits []Edit) []changeRegion {
	var regions []changeRegion
	inChange := false
	var current changeRegion

	for i, e := range edits {
		if e.Type != EditEqual {
			if !inChange {
				current.startIdx = i
				inChange = true
			}
			current.endIdx = i
		} else {
			if inChange {
				regions = append(regions, current)
				inChange = false
			}
		}
	}

	// 处理末尾的变更区域
	if inChange {
		regions = append(regions, current)
	}

	return regions
}

// mergeRegions 合并相距 <= 2*contextLines 的变更区域.
// 距离通过两个区域之间的 Equal 行数来衡量.
func mergeRegions(edits []Edit, regions []changeRegion, contextLines int) []changeRegion {
	if len(regions) <= 1 {
		return regions
	}

	mergeThreshold := 2 * contextLines

	merged := []changeRegion{regions[0]}

	for i := 1; i < len(regions); i++ {
		prev := &merged[len(merged)-1]
		curr := regions[i]

		// 计算两个区域之间的 Equal 行数
		gap := 0
		for j := prev.endIdx + 1; j < curr.startIdx; j++ {
			if edits[j].Type == EditEqual {
				gap++
			}
		}

		if gap <= mergeThreshold {
			// 合并:扩展前一个区域到当前区域的末尾
			prev.endIdx = curr.endIdx
		} else {
			// 不合并:新增一个区域
			merged = append(merged, curr)
		}
	}

	return merged
}

// buildHunk 为一个变更区域(可能是合并后的)构建 Hunk.
// 包含 contextLines 行的前后上下文.
func buildHunk(edits []Edit, region changeRegion, contextLines int) Hunk {
	// 计算上下文的起止索引
	ctxStart := region.startIdx - contextLines
	if ctxStart < 0 {
		ctxStart = 0
	}
	ctxEnd := region.endIdx + contextLines
	if ctxEnd >= len(edits) {
		ctxEnd = len(edits) - 1
	}

	// 确保上下文边界只包含 Equal 行(不越过其他变更区域的边界)
	// ctxStart 向前扩展时已经由 mergeRegions 保证不会越界

	var hunk Hunk
	hunk.Lines = make([]string, 0, ctxEnd-ctxStart+1)

	// 统计行号
	oldStart := 0
	newStart := 0
	oldCount := 0
	newCount := 0
	startSet := false

	for i := ctxStart; i <= ctxEnd; i++ {
		e := edits[i]

		if !startSet {
			switch e.Type {
			case EditEqual:
				oldStart = e.OldLine
				newStart = e.NewLine
			case EditDelete:
				oldStart = e.OldLine
				newStart = e.NewLine
				// 删除行的 NewLine 为 0,需要推算
				// 从前面的 edit 推算当前新文本位置
				newStart = inferNewLine(edits, i)
			case EditInsert:
				oldStart = inferOldLine(edits, i)
				newStart = e.NewLine
			}
			startSet = true
		}

		switch e.Type {
		case EditEqual:
			hunk.Lines = append(hunk.Lines, " "+e.Content)
			oldCount++
			newCount++
		case EditDelete:
			hunk.Lines = append(hunk.Lines, "-"+e.Content)
			oldCount++
		case EditInsert:
			hunk.Lines = append(hunk.Lines, "+"+e.Content)
			newCount++
		}
	}

	// 边界修正:如果 oldStart 或 newStart 为 0,设为 1
	if oldStart == 0 {
		oldStart = 1
	}
	if newStart == 0 {
		newStart = 1
	}

	hunk.OldStart = oldStart
	hunk.OldLines = oldCount
	hunk.NewStart = newStart
	hunk.NewLines = newCount

	return hunk
}

// inferNewLine 推算编辑脚本中某个位置对应的新文本行号.
// 用于 Delete 行(其 NewLine 为 0)的 hunk 起始行号推算.
func inferNewLine(edits []Edit, idx int) int {
	// 向前搜索最近的有 NewLine 信息的 edit
	for i := idx - 1; i >= 0; i-- {
		if edits[i].NewLine > 0 {
			// 计算从那个位置到 idx 之间的插入/相同行数
			count := 0
			for j := i; j < idx; j++ {
				if edits[j].Type == EditInsert || edits[j].Type == EditEqual {
					count++
				}
			}
			return edits[i].NewLine + count
		}
	}
	// 没有前序信息,从 1 开始
	return 1
}

// inferOldLine 推算编辑脚本中某个位置对应的旧文本行号.
// 用于 Insert 行(其 OldLine 为 0)的 hunk 起始行号推算.
func inferOldLine(edits []Edit, idx int) int {
	// 向前搜索最近的有 OldLine 信息的 edit
	for i := idx - 1; i >= 0; i-- {
		if edits[i].OldLine > 0 {
			count := 0
			for j := i; j < idx; j++ {
				if edits[j].Type == EditDelete || edits[j].Type == EditEqual {
					count++
				}
			}
			return edits[i].OldLine + count
		}
	}
	return 1
}

// FormatUnified 将 hunks 格式化为 unified diff 文本.
// 输出格式遵循 GNU diff 的 unified format 标准.
//
// 精妙之处(CLEVER): 空 hunks 返回空字符串(不是带 header 的空 diff),
// 避免消费方看到只有 header 没有内容的无意义 diff.
func FormatUnified(filename string, hunks []Hunk) string {
	if len(hunks) == 0 {
		return ""
	}

	var sb strings.Builder

	// 文件头
	sb.WriteString(fmt.Sprintf("--- %s\n", filename))
	sb.WriteString(fmt.Sprintf("+++ %s\n", filename))

	// 各 hunk
	for _, h := range hunks {
		// hunk header: @@ -oldStart,oldLines +newStart,newLines @@
		sb.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\n",
			h.OldStart, h.OldLines,
			h.NewStart, h.NewLines))

		// hunk 内容
		for _, line := range h.Lines {
			sb.WriteString(line)
			sb.WriteByte('\n')
		}
	}

	return sb.String()
}

// ─────────────────────────────────────────────────────────────────────
// Patch 应用 -- StructuredPatch 的逆操作
// ─────────────────────────────────────────────────────────────────────

// ApplyPatch 将结构化 hunks 应用到 oldText,生成修补后的新文本.
//
// 这是 StructuredPatch 的逆操作:
//   ApplyPatch(old, StructuredPatch(old, new, ctx)) == new
//
// 算法:
//  1. 将 oldText 按行拆分
//  2. 按 OldStart 排序 hunks(防御乱序输入)
//  3. 逐 hunk 顺序处理:
//     - 复制 hunk 前的不变行
//     - " " 上下文行: 校验必须匹配 oldText 对应行
//     - "-" 删除行: 校验必须匹配, 从结果中跳过
//     - "+" 插入行: 写入结果
//  4. 复制最后一个 hunk 后的剩余行
//
// 精妙之处(CLEVER): 上下文行校验不是多余的防御 -- 它是"这个 patch 还能用吗"
// 的运行时检测. 原文被其他人改过? 上下文失配, 立即报错, 不会静默产生垃圾.
// 这与 git apply --check 的理念一致: 宁可拒绝, 不可误用.
//
// ELEVATED: 不只是代码 patching -- 这是变更管理的"应用"原语.
// 法律合同修订/医疗处方变更/财务报表修正, 任何需要"可验证地应用一组结构化变更"
// 的领域都是这个函数的消费者. Diff 是"描述变更", ApplyPatch 是"执行变更".
func ApplyPatch(oldText string, hunks []Hunk) (string, error) {
	if len(hunks) == 0 {
		return oldText, nil
	}

	oldLines := splitLines(oldText)

	// 按 OldStart 排序, 防御乱序输入.
	// 复制切片避免修改调用方的数据.
	sorted := make([]Hunk, len(hunks))
	copy(sorted, hunks)
	sort.Slice(sorted, func(i, j int) bool {
		return sorted[i].OldStart < sorted[j].OldStart
	})

	// 预分配: 最坏情况不超过 oldLines + 所有 hunk 的插入行.
	maxLines := len(oldLines)
	for _, h := range sorted {
		maxLines += h.NewLines
	}
	result := make([]string, 0, maxLines)

	oldIdx := 0 // 0-based 游标, 指向 oldLines 中下一个未消费的行

	for hi, hunk := range sorted {
		hunkStart := hunk.OldStart - 1 // 转为 0-based

		// 重叠检测: 如果这个 hunk 的起点在游标之前, 说明 hunks 有重叠.
		if hunkStart < oldIdx {
			return "", fmt.Errorf("applypatch: hunk %d (OldStart=%d) overlaps -- "+
				"cursor already at line %d", hi+1, hunk.OldStart, oldIdx+1)
		}

		// 复制 hunk 前的不变行.
		if hunkStart > len(oldLines) {
			return "", fmt.Errorf("applypatch: hunk %d (OldStart=%d) starts beyond "+
				"file end (file has %d lines)", hi+1, hunk.OldStart, len(oldLines))
		}
		result = append(result, oldLines[oldIdx:hunkStart]...)
		oldIdx = hunkStart

		// 逐行应用 hunk.
		for li, line := range hunk.Lines {
			if len(line) == 0 {
				return "", fmt.Errorf("applypatch: hunk %d line %d is empty "+
					"(missing prefix)", hi+1, li+1)
			}

			prefix := line[0]
			content := line[1:]

			switch prefix {
			case ' ': // 上下文: 校验 + 输出
				if oldIdx >= len(oldLines) {
					return "", fmt.Errorf("applypatch: hunk %d context at line %d "+
						"extends beyond file end (%d lines)",
						hi+1, oldIdx+1, len(oldLines))
				}
				if oldLines[oldIdx] != content {
					return "", fmt.Errorf("applypatch: hunk %d context mismatch at "+
						"line %d: patch expects %q, file has %q",
						hi+1, oldIdx+1, content, oldLines[oldIdx])
				}
				result = append(result, content)
				oldIdx++

			case '-': // 删除: 校验 + 跳过
				if oldIdx >= len(oldLines) {
					return "", fmt.Errorf("applypatch: hunk %d delete at line %d "+
						"extends beyond file end (%d lines)",
						hi+1, oldIdx+1, len(oldLines))
				}
				if oldLines[oldIdx] != content {
					return "", fmt.Errorf("applypatch: hunk %d delete mismatch at "+
						"line %d: patch expects %q, file has %q",
						hi+1, oldIdx+1, content, oldLines[oldIdx])
				}
				oldIdx++ // 消费但不输出

			case '+': // 插入: 输出, 不消费 oldLines
				result = append(result, content)

			default:
				return "", fmt.Errorf("applypatch: hunk %d line %d has unknown "+
					"prefix %q", hi+1, li+1, string(prefix))
			}
		}
	}

	// 复制最后一个 hunk 之后的剩余行.
	if oldIdx < len(oldLines) {
		result = append(result, oldLines[oldIdx:]...)
	}

	return strings.Join(result, "\n"), nil
}

// ReverseHunks 反转 hunks 的方向: "+" 变 "-", "-" 变 "+",
// OldStart/OldLines 与 NewStart/NewLines 互换.
//
// 精妙之处(CLEVER): 结合 ApplyPatch 实现对称 undo, 无需存储完整快照:
//   ApplyPatch(newText, ReverseHunks(hunks)) == oldText
//
// 原方案(LEGACY): fileedit 当前用完整快照做 undo(存整个旧文件内容).
// 快照 undo 简单但 O(fileSize) 空间; patch-based undo 是 O(diffSize) 空间.
// 对于"改了一行的万行文件", patch undo 比快照节省 99.99% 存储.
func ReverseHunks(hunks []Hunk) []Hunk {
	reversed := make([]Hunk, len(hunks))
	for i, h := range hunks {
		rh := Hunk{
			OldStart: h.NewStart,
			OldLines: h.NewLines,
			NewStart: h.OldStart,
			NewLines: h.OldLines,
			Lines:    make([]string, len(h.Lines)),
		}
		for j, line := range h.Lines {
			if len(line) == 0 {
				rh.Lines[j] = line
				continue
			}
			switch line[0] {
			case '+':
				rh.Lines[j] = "-" + line[1:]
			case '-':
				rh.Lines[j] = "+" + line[1:]
			default:
				rh.Lines[j] = line // 上下文行不变
			}
		}
		reversed[i] = rh
	}
	return reversed
}