package engine

// filecache.go - 文件状态缓存
//
// 定位:跟踪 Agent 读过哪些文件及其内容哈希,大小,修改时间.
//
// 核心用途:
//   - 避免重复读取同一文件(FileRead 工具检查缓存)
//   - 检测文件被外部修改(mtime 比较)
//   - 压缩后恢复时知道哪些文件需要重新注入
//   - 统计已读文件列表(用于系统提醒)
//
// 核心设计决策:
//   - O(1) LRU:container/list 双向链表 + map[string]*list.Element 双结构
//     Get/Record/Evict 全部 O(1),替代早期方案 O(n) 线性扫描
//   - Peek vs Get 分离:Peek 不更新 LRU 顺序,用于权限检查/后台扫描等"观察不污染"场景
//   - 线程安全:sync.RWMutex,Peek/Get/IsModified 读操作用 RLock
//
// 升华改进(ELEVATED): 早期方案无 Peek--所有查询都走 Get,会污染 LRU 顺序.
// 权限系统检查"文件是否被读过"时不应该把该文件蹭到 LRU 顶端,
// 否则后台权限扫描会把真正频繁使用的文件驱逐出缓存.
// 替代方案:<统一用 Get,不区分观察/使用> -
// 否决原因:后台扫描(秘密检测,审计,新鲜度检查)会系统性污染 LRU,
// 导致工作集中频繁使用的文件反而被淘汰.

import (
	"container/list"
	"crypto/sha256"
	"fmt"
	"os"
	"sync"
	"time"
)

// FileCacheEntry 是文件缓存的单个条目.
//
// **消费形态**: pull API (外部调取). 字段供以下 getter 返回的
// 快照 / 指针消费:
//   - `FileStateCache.Info(path) (FileCacheEntry, bool)` -- 值副本,
//     外部消费者 (SDK / platform / 诊断) 安全读字段
//   - `FileStateCache.Get/Peek(path) (*FileCacheEntry, bool)` -- 指针,
//     内部消费路径 (reminders.go CheckFileModifications 已接)
//
// 字段活点:
//   - Path: RecentFiles() 直接读 (消除 lruEntry.path 双重存后 single
//     source of truth)
//   - ContentHash / Size / LineCount / ReadAt / WasModified: reminders.go
//     CheckFileModifications 消费 -- reminder 文本里展示给模型
//     "Agent 当时读到的 N bytes / M lines / hash XX / T 前 读过"
//   - ModTime: IsModified(path) 内部读做 mtime 比对
//
// FileCacheEntry is a single entry in the file state cache.
//
// Consumption shape: pull API. Fields surface via:
//   - FileStateCache.Info(path) (FileCacheEntry, bool) -- value copy,
//     safe for external consumers (SDK / platform / diagnostics)
//   - FileStateCache.Get/Peek(path) (*FileCacheEntry, bool) -- pointer,
//     for internal consumers (reminders.go CheckFileModifications)
//
// All six fields (Path / ContentHash / Size / LineCount / ReadAt /
// WasModified / ModTime) are read by real production code paths; none
// are formal-only.
type FileCacheEntry struct {
	Path        string    // 绝对路径
	ContentHash string    // SHA256 前 16 字符
	Size        int64     // 文件大小(字节)
	LineCount   int       // 行数
	ReadAt      time.Time // 最后读取时间
	ModTime     time.Time // 文件修改时间(读取时的)
	WasModified bool      // 自上次读取后是否被外部修改
}

// CacheStats 是缓存命中率和容量统计的值快照.
//
// **消费形态 (调取 pull)**: 由 `FileStateCache.Stats() CacheStats` 返回,
// 消费者 (CLI 诊断命令 / SaaS 监控面板 / 测试 harness / 外部 SDK) 主动调
// 读字段. 和 `Session.Stats()` / `DenialTracker.Stats()` 同构 -- scanner
// 视野内无内部 reader 是正常的: 统计快照本就是暴露给**消费层**用的, 不
// 该强加内部 reader 来"过扫描器". 见 docs/api-reference.md "API 消费形态"
// 章节的 pull 形态清单.
//
// 字段解读:
//   - Entries: 当前缓存条目数, 判断缓存占用率 (Entries/MaxSize).
//   - MaxSize: 配置上限, 容量规划输入.
//   - Hits/Misses: 命中率, `HitRate()` 方法的加工结果也由此推导.
//   - Evictions: 淘汰次数, 过高说明 MaxSize 偏小或工作集过大.
//
// CacheStats is a value snapshot of cache hit-rate and capacity stats.
//
// Consumption shape (pull): returned by FileStateCache.Stats(); consumers
// (CLI diagnostic commands / SaaS dashboards / test harness / external
// SDK) actively read the fields. Structurally identical to
// Session.Stats() / DenialTracker.Stats() -- the absence of an internal
// reader within the scanner's view is expected. Stats snapshots are
// meant for the consumption layer; forcing an internal reader just to
// satisfy a linter would be dishonest. See docs/api-reference.md
// "API consumption shapes" for the pull-shape catalogue.
type CacheStats struct {
	Entries   int // 当前缓存条目数
	MaxSize   int // 最大缓存条目数
	Hits      int // 缓存命中次数(Get 命中)
	Misses    int // 缓存未命中次数(Get 未命中)
	Evictions int // 淘汰次数
}

// HitRate 返回缓存命中率(0.0 ~ 1.0).
// 如果没有任何请求,返回 0.
func (s CacheStats) HitRate() float64 {
	total := s.Hits + s.Misses
	if total == 0 {
		return 0
	}
	return float64(s.Hits) / float64(total)
}

// lruEntry 是 LRU 链表节点的值类型, 只持 *FileCacheEntry.
//
// 精妙之处(CLEVER): 链表节点持有 *FileCacheEntry 而非直接存条目,
// 避免 list.Element.Value 类型断言链路变复杂--map[string]*list.Element
// 可 O(1) 拿到链表位置, list.Element.Value.(*lruEntry) 拿到条目指针.
// 如果只在 map 里存 *FileCacheEntry, 还需要另一个 map 记录 Element 位置.
//
// 早期版本在此另存一份 path string (和 entry.Path 双重存), 设计残留:
// path 既是 cache key 又是条目属性, 但两份永远相等且值来源相同.
// 2026-04-20 修复: 彻底以 entry.Path 为 single source of truth,
// lruEntry 只持指针, 把 FileCacheEntry.Path 从"声明未读"激活为真消费
// 点 (RecentFiles 直接读).
type lruEntry struct {
	entry *FileCacheEntry
}

// FileStateCache 跟踪 Agent 读过哪些文件,它们的内容哈希和大小.
//
// 精妙之处(CLEVER): O(1) LRU 实现--container/list(双向链表)+ map[string]*list.Element 双结构.
// 链表维护时序(头=最新,尾=最旧),map 维护 path→节点 的 O(1) 查找.
// 移动节点到头部:list.MoveToFront(elem),O(1).
// 淘汰最旧节点:list.Back() + list.Remove(),O(1).
//
// 对比早期方案 O(n) 实现:每次 Get/Record 都要 O(n) 线性扫描 []string lru 队列找旧位置.
// 在 maxSize=1000 时差距不大(≈1μs),但如果 maxSize 增大到 10K+ 时早期方案会明显变慢.
// 更重要的是:O(1) 实现是更正确的 LRU,不是过度工程.
type FileStateCache struct {
	mu      sync.RWMutex
	entries map[string]*list.Element // path → 链表节点(节点值为 *lruEntry)
	lru     *list.List               // 双向链表,头=最近使用,尾=最久未使用
	maxSize int                      // 最大缓存条目数(默认 1000)

	// 统计字段(只有 Get 会更新,Peek 不计入)
	hits      int
	misses    int
	evictions int
}

// NewFileStateCache 创建一个文件状态缓存.
// maxSize 指定最大缓存条目数,<= 0 时使用默认值 1000.
func NewFileStateCache(maxSize int) *FileStateCache {
	if maxSize <= 0 {
		maxSize = 1000
	}
	return &FileStateCache{
		entries: make(map[string]*list.Element),
		lru:     list.New(),
		maxSize: maxSize,
	}
}

// Record 在读取文件后记录其状态到缓存.
// content 是文件的完整内容(用于计算哈希和行数).
func (c *FileStateCache) Record(path string, content []byte) {
	c.mu.Lock()
	defer c.mu.Unlock()

	// 计算内容哈希(SHA256 前 16 字符)
	// 精妙之处(CLEVER): SHA256 hex 固定 64 字符,直接截取,无需条件判断--
	// 原方案 `if len(hashStr) > 16` 恒为真(死代码),移除后逻辑完全等价.
	hash := sha256.Sum256(content)
	hashStr := fmt.Sprintf("%x", hash)[:16]

	// 统计行数
	lineCount := 0
	for _, b := range content {
		if b == '\n' {
			lineCount++
		}
	}
	// 如果文件非空且不以换行结尾,也算一行
	if len(content) > 0 && content[len(content)-1] != '\n' {
		lineCount++
	}

	// 获取文件的修改时间
	var modTime time.Time
	if info, err := os.Stat(path); err == nil {
		modTime = info.ModTime()
	}

	entry := &FileCacheEntry{
		Path:        path,
		ContentHash: hashStr,
		Size:        int64(len(content)),
		LineCount:   lineCount,
		ReadAt:      time.Now(),
		ModTime:     modTime,
		WasModified: false,
	}

	if elem, ok := c.entries[path]; ok {
		// 已存在:更新条目值,移动到链表头部(最近使用)
		elem.Value.(*lruEntry).entry = entry
		c.lru.MoveToFront(elem)
	} else {
		// 新条目:插入链表头部
		elem = c.lru.PushFront(&lruEntry{entry: entry})
		c.entries[path] = elem
	}

	c.evictIfNeeded()
}

// Get 查询指定路径的缓存条目,并将其标记为"最近使用"(更新 LRU 顺序).
// 返回缓存条目和是否存在.命中/未命中会计入统计.
//
// 精妙之处(CLEVER): Get 主动更新 LRU 顺序--每次访问都把条目移到链表头部.
// 这实现了经典 LRU 语义:最近访问的最后被淘汰.
// 如果你只想查询存在性而不想影响淘汰顺序(如权限检查),请用 Peek.
func (c *FileStateCache) Get(path string) (*FileCacheEntry, bool) {
	c.mu.Lock()
	defer c.mu.Unlock()

	if elem, ok := c.entries[path]; ok {
		c.hits++
		c.lru.MoveToFront(elem)
		return elem.Value.(*lruEntry).entry, true
	}
	c.misses++
	return nil, false
}

// Peek 查询缓存条目但**不更新 LRU 顺序,不计入命中统计**.
//
// 升华改进(ELEVATED): 早期方案无 Peek,所有查询都用 Get 更新 LRU 顺序.
// 权限系统,后台审计,秘密扫描等"只想观察"的场景用 Get 会污染 LRU--
// 被扫描的文件蹭到顶端,真正频繁使用的文件反而可能被驱逐.
// 我们提供 Peek 实现"观察不污染"语义,用于:
//   - 权限分类器检查"此文件是否被读过"
//   - SecretGuard 后台扫描已缓存文件
//   - DreamEngine / FreshnessChecker 检查文件缓存状态
//
// 替代方案:<统一用 Get> -
// 否决原因:后台周期性扫描会系统性地把扫描集里的所有文件推到 LRU 顶端,
// 导致工作集外的文件占满缓存,把工作集内的文件驱逐出去.
func (c *FileStateCache) Peek(path string) (*FileCacheEntry, bool) {
	c.mu.RLock()
	defer c.mu.RUnlock()

	if elem, ok := c.entries[path]; ok {
		return elem.Value.(*lruEntry).entry, true
	}
	return nil, false
}

// IsModified 检查文件自上次读取后是否被外部修改.
// 通过比较当前文件的 mtime 和缓存中记录的 mtime 判断.
// 如果文件不在缓存中或无法 stat,返回 true(保守策略).
//
// 精妙之处(CLEVER): IsModified 使用 Peek 语义--不更新 LRU 顺序.
// 修改检测是"观察"行为,不是"使用"行为.
// 如果用 Get,每次检测外部修改都会把文件蹭到 LRU 顶端,
// 导致"被检测但未被使用"的文件挤占缓存空间.
func (c *FileStateCache) IsModified(path string) bool {
	c.mu.RLock()
	elem, ok := c.entries[path]
	var entry *FileCacheEntry
	if ok {
		entry = elem.Value.(*lruEntry).entry
	}
	c.mu.RUnlock()

	if !ok {
		return true // 不在缓存中,认为已修改(保守策略)
	}

	info, err := os.Stat(path)
	if err != nil {
		return true // 无法访问文件,认为已修改
	}

	modified := !info.ModTime().Equal(entry.ModTime)

	// 如果检测到修改,更新标记(需要写锁)
	if modified {
		c.mu.Lock()
		if elem2, ok2 := c.entries[path]; ok2 {
			elem2.Value.(*lruEntry).entry.WasModified = true
		}
		c.mu.Unlock()
	}

	return modified
}

// RecentFiles 返回最近读过的文件列表(按最近使用排序,最近的在前).
// limit 指定最多返回多少个,<= 0 时返回全部.
func (c *FileStateCache) RecentFiles(limit int) []string {
	c.mu.RLock()
	defer c.mu.RUnlock()

	n := c.lru.Len()
	if limit > 0 && limit < n {
		n = limit
	}

	result := make([]string, 0, n)
	// 链表头部是最近使用, 从头遍历. 读 entry.Path 是消除"双重存"后的
	// single source of truth -- lruEntry 不再另存 path, FileCacheEntry.Path
	// 是唯一出处.
	// Head is most-recently-used; iterate forward. Reading entry.Path
	// is the single source of truth after removing the duplicate
	// lruEntry.path: FileCacheEntry.Path is authoritative.
	for elem := c.lru.Front(); elem != nil && len(result) < n; elem = elem.Next() {
		result = append(result, elem.Value.(*lruEntry).entry.Path)
	}
	return result
}

// Clear 清空缓存所有条目和统计数据.
func (c *FileStateCache) Clear() {
	c.mu.Lock()
	defer c.mu.Unlock()

	c.entries = make(map[string]*list.Element)
	c.lru.Init()
	c.hits = 0
	c.misses = 0
	c.evictions = 0
}

// Info 返回 path 对应缓存条目的完整快照 (值类型, 非指针), 不影响
// LRU 顺序, 不计入命中统计. 和 Peek 的差别: Peek 返回 *FileCacheEntry
// 指针供内部读, 外部拿到指针可能无意 mutate cache 状态; Info 返回值
// 副本, 安全暴露给外部消费者 (SDK / platform / 诊断面板).
//
// 这是 FileCacheEntry 元数据的**真 pull API**: 此前 FileCacheEntry
// 声明在 pkg/engine 是 exported 类型, 却没有任何 exported 出口让外部
// 拿到实例, 导致 Path / ContentHash / Size / LineCount / ReadAt /
// WasModified 6 字段长期"声明未读". Info 打通这条路径, 让外部能读:
//   - 内容哈希: 诊断"Agent 看到的版本 vs 当前磁盘版本"
//   - 大小 / 行数: 审计"Agent 读了多大文件"
//   - 读取时间: 审计"上次读这个文件是何时"
//   - 被修改标记: 展示"此文件自读后被外部改过"
//
// Info returns a value snapshot of the cache entry for path, with no
// LRU reordering and no hit-stat impact. Differs from Peek (which
// returns *FileCacheEntry for internal reads and allows accidental
// mutation) by returning a copy -- safe to expose to external consumers.
// This is the real pull API for FileCacheEntry metadata; previously
// the struct was exported but unreachable from outside, leaving the
// six fields formally defined yet never read.
func (c *FileStateCache) Info(path string) (FileCacheEntry, bool) {
	c.mu.RLock()
	defer c.mu.RUnlock()
	elem, ok := c.entries[path]
	if !ok {
		return FileCacheEntry{}, false
	}
	return *elem.Value.(*lruEntry).entry, true
}

// Stats 返回缓存命中率统计.
func (c *FileStateCache) Stats() CacheStats {
	c.mu.RLock()
	defer c.mu.RUnlock()

	return CacheStats{
		Entries:   len(c.entries),
		MaxSize:   c.maxSize,
		Hits:      c.hits,
		Misses:    c.misses,
		Evictions: c.evictions,
	}
}

// evictIfNeeded 如果缓存超过最大条目数,淘汰最久未使用的条目(内部方法,调用者持有写锁).
//
// 精妙之处(CLEVER): 从链表尾部淘汰,O(1).
// list.Back() 返回最久未使用的节点(尾部),list.Remove() 删除并断链,O(1).
// 对应 map 里同步删除,保持两个结构一致.
func (c *FileStateCache) evictIfNeeded() {
	for len(c.entries) > c.maxSize {
		oldest := c.lru.Back()
		if oldest == nil {
			break
		}
		path := oldest.Value.(*lruEntry).entry.Path
		c.lru.Remove(oldest)
		delete(c.entries, path)
		c.evictions++
	}
}