package daemon

import (
	"context"
	"sync/atomic"
	"testing"
	"time"
)

// TestDaemonManager_HeartbeatDisabled_NoService locks sub-claim (a-neg):
// when DaemonConfig.HeartbeatInterval is zero, no HeartbeatService is
// constructed. This guards against "wire always active" regressions
// that would leak a goroutine even when the operator explicitly
// disabled heartbeat via interval=0.
//
// 锁 sub-claim (a-neg): DaemonConfig.HeartbeatInterval 为 0 时
// 不构造 HeartbeatService. 防御 "wire 总是激活" 类回归 --
// 当操作员用 interval=0 显式关闭心跳时不能泄漏 goroutine.
func TestDaemonManager_HeartbeatDisabled_NoService(t *testing.T) {
	dm := NewDaemonManager(nil, nil, DaemonConfig{})
	if dm.heartbeat != nil {
		t.Fatalf("dm.heartbeat = %v, want nil when HeartbeatInterval=0", dm.heartbeat)
	}
}

// TestDaemonManager_HeartbeatEnabled_ForwardsConfig locks sub-claim (a):
// cfg.HeartbeatInterval AND cfg.HeartbeatTimeout are both read and
// forwarded into the HeartbeatConfig carried by the constructed
// HeartbeatService. This is the single production read-site for
// both DaemonConfig fields -- if the values drift here the runtime
// scan frequency / staleness threshold drift silently.
//
// 锁 sub-claim (a): cfg.HeartbeatInterval 和 cfg.HeartbeatTimeout
// 都被读取并透传到所构造 HeartbeatService 的 HeartbeatConfig.
// 这是两个 DaemonConfig 字段的唯一产线读点 -- 若此处值漂移,
// 运行时的扫描频率/超时阈值会悄然偏离.
func TestDaemonManager_HeartbeatEnabled_ForwardsConfig(t *testing.T) {
	const (
		wantInterval = 7 * time.Second
		wantTimeout  = 21 * time.Second
	)
	dm := NewDaemonManager(nil, nil, DaemonConfig{
		HeartbeatInterval: wantInterval,
		HeartbeatTimeout:  wantTimeout,
	})
	if dm.heartbeat == nil {
		t.Fatalf("dm.heartbeat = nil, want non-nil when HeartbeatInterval>0")
	}
	if got := dm.heartbeat.cfg.Interval; got != wantInterval {
		t.Errorf("dm.heartbeat.cfg.Interval = %v, want %v (cfg.HeartbeatInterval not forwarded)", got, wantInterval)
	}
	if got := dm.heartbeat.cfg.Timeout; got != wantTimeout {
		t.Errorf("dm.heartbeat.cfg.Timeout = %v, want %v (cfg.HeartbeatTimeout not forwarded)", got, wantTimeout)
	}
}

// TestDaemonManager_CloseSession_UnregistersFromHeartbeat locks the
// sub-claim that DaemonManager lifecycle hooks actually flow through
// the heartbeat surface: closeSession must Unregister or the scan
// thread will fire closeSession again on the next tick for a session
// that no longer exists (benign but noisy). We seed the heartbeat
// map directly via the public Register API (as DaemonManager does in
// getOrCreateSession) and a stub sessions entry so closeSession can
// navigate without a real engine/transport, then assert the heartbeat
// no longer tracks the ID.
//
// 锁 DaemonManager 生命周期钩子真的流到 heartbeat 面: closeSession
// 必须 Unregister, 否则扫描线程下次 tick 会对已不存在的会话再调
// closeSession (无害但日志噪音). 用 public Register API 先注册
// (模拟 getOrCreateSession 的行为) + 塞一个 stub sessions 条目,
// 让 closeSession 能在没有真 engine/transport 时跑完, 最后断言
// heartbeat 不再追踪该 ID.
func TestDaemonManager_CloseSession_UnregistersFromHeartbeat(t *testing.T) {
	dm := NewDaemonManager(nil, nil, DaemonConfig{
		HeartbeatInterval: 1 * time.Second,
		HeartbeatTimeout:  3 * time.Second,
	})
	if dm.heartbeat == nil {
		t.Fatalf("prerequisite: heartbeat should be non-nil")
	}
	const sid = "test-session-close"

	// Seed as if getOrCreateSession had run (minimum state closeSession
	// can traverse without a real engine / transport / isolation).
	// 模拟 getOrCreateSession 跑过后的最小状态, 让 closeSession
	// 能在没有真 engine/transport/isolation 时跑完.
	dm.sessions[sid] = &ManagedSession{isolation: SharedIsolation{}}
	dm.heartbeat.Register(sid)

	if n := countHeartbeatSessions(dm.heartbeat); n != 1 {
		t.Fatalf("heartbeat should track 1 session before close, got %d", n)
	}

	dm.closeSession(sid)

	if n := countHeartbeatSessions(dm.heartbeat); n != 0 {
		t.Errorf("heartbeat should track 0 sessions after closeSession, got %d", n)
	}
}

// countHeartbeatSessions is a white-box helper for _test.go only --
// the package-internal sessions map is not part of the public API.
// 仅 _test.go 使用的白盒辅助 -- 包内 sessions map 非公共 API.
func countHeartbeatSessions(hs *HeartbeatService) int {
	hs.mu.RLock()
	defer hs.mu.RUnlock()
	return len(hs.sessions)
}

// TestDaemonManager_CrashRecoveryDisabled_NoWrapper locks sub-claim
// (a-neg): when DaemonConfig.CrashRecovery is nil, no CrashRecovery
// wrapper is constructed. Mirrors the Heartbeat-disabled invariant:
// operator opts in via non-nil pointer; zero-value config must not
// silently engage a retry policy the operator didn't choose.
//
// 锁 sub-claim (a-neg): DaemonConfig.CrashRecovery 为 nil 时
// 不构造 CrashRecovery 包装器. 与 Heartbeat-disabled 不变量一致:
// 运维通过非 nil 指针选择启用; 零值配置不能悄然启一套运维未选的
// 重试策略.
func TestDaemonManager_CrashRecoveryDisabled_NoWrapper(t *testing.T) {
	dm := NewDaemonManager(nil, nil, DaemonConfig{})
	if dm.crashRecovery != nil {
		t.Fatalf("dm.crashRecovery = %v, want nil when cfg.CrashRecovery=nil", dm.crashRecovery)
	}
}

// TestDaemonManager_CrashRecoveryEnabled_ForwardsConfig locks
// sub-claim (a): NewDaemonManager dereferences cfg.CrashRecovery and
// forwards its fields (MaxRetries + InitialDelay + MaxDelay +
// Multiplier) into the CrashRecovery constructor. If any field
// drifts here, the runtime retry policy drifts silently -- the
// single production read-site must faithfully carry operator
// intent.
//
// 锁 sub-claim (a): NewDaemonManager 解引用 cfg.CrashRecovery
// 并把字段 (MaxRetries + InitialDelay + MaxDelay + Multiplier)
// 透传到 CrashRecovery 构造器. 若此处字段漂移, 运行时重试策略
// 悄然偏离 -- 唯一产线读点必须忠实承载运维意图.
func TestDaemonManager_CrashRecoveryEnabled_ForwardsConfig(t *testing.T) {
	policy := &CrashRecoveryConfig{
		MaxRetries:   5,
		InitialDelay: 2 * time.Second,
		MaxDelay:     30 * time.Second,
		Multiplier:   3.0,
	}
	dm := NewDaemonManager(nil, nil, DaemonConfig{CrashRecovery: policy})
	if dm.crashRecovery == nil {
		t.Fatalf("dm.crashRecovery = nil, want non-nil when cfg.CrashRecovery != nil")
	}
	got := dm.crashRecovery.cfg
	if got.MaxRetries != policy.MaxRetries {
		t.Errorf("crashRecovery.cfg.MaxRetries = %d, want %d", got.MaxRetries, policy.MaxRetries)
	}
	if got.InitialDelay != policy.InitialDelay {
		t.Errorf("crashRecovery.cfg.InitialDelay = %v, want %v", got.InitialDelay, policy.InitialDelay)
	}
	if got.MaxDelay != policy.MaxDelay {
		t.Errorf("crashRecovery.cfg.MaxDelay = %v, want %v", got.MaxDelay, policy.MaxDelay)
	}
	if got.Multiplier != policy.Multiplier {
		t.Errorf("crashRecovery.cfg.Multiplier = %v, want %v", got.Multiplier, policy.Multiplier)
	}
}

// TestRunWithRecover_NormalReturnsNil locks the happy path of the
// panic-to-error bridge: normal fn completion must return nil so
// CrashRecovery does not retry a perfectly successful prompt run.
// This is what prevents "every completed prompt triggers 3 bonus
// retries" regression.
//
// 锁 panic-to-error 桥接的正常路径: fn 正常完成必须返回 nil,
// 避免 CrashRecovery 对成功的 prompt 多跑 3 次重试. 防御
// "每次 prompt 完成都附赠 3 次重试" 类回归.
func TestRunWithRecover_NormalReturnsNil(t *testing.T) {
	called := false
	err := runWithRecover(func() { called = true })
	if err != nil {
		t.Errorf("runWithRecover returned err=%v for normal fn, want nil", err)
	}
	if !called {
		t.Errorf("runWithRecover did not invoke fn")
	}
}

// TestRunWithRecover_PanicToError locks the crash-reification side:
// a panicked fn must surface as a non-nil error, otherwise
// CrashRecovery can never fire. The error is intentionally opaque
// (no special type) because it's consumed only by CrashRecovery's
// generic error-means-retry logic.
//
// 锁崩溃转 error 方向: fn panic 必须冒泡成 non-nil error,
// 否则 CrashRecovery 永不触发. error 内容有意保持不透明
// (无特殊类型), 因为只被 CrashRecovery 的通用 "error=重试"
// 逻辑消费.
func TestRunWithRecover_PanicToError(t *testing.T) {
	err := runWithRecover(func() { panic("boom") })
	if err == nil {
		t.Fatalf("runWithRecover returned nil for panicking fn, want non-nil")
	}
}

// TestCrashRecovery_WrapsPanickingPromptRun locks the end-to-end
// sub-claim: runWithRecover + CrashRecovery.RunWithRecovery compose
// into a real retry loop when a prompt handler panics. We use the
// same CrashRecovery the daemon would build (via
// NewDaemonManager) and assert OnCrash fires once per attempt,
// OnGiveUp fires once at the end, and the panicking fn is called
// exactly MaxRetries+1 times (first attempt + retries). This is the
// critical evidence that dm.crashRecovery is *consumed*, not merely
// stored.
//
// 锁端到端 sub-claim: runWithRecover + CrashRecovery.RunWithRecovery
// 对 panic 的 prompt 处理组合成真重试循环. 用 NewDaemonManager
// 构造的同一个 CrashRecovery 实例, 断言 OnCrash 每次 attempt 触发
// 一次, OnGiveUp 结束时触发一次, panic 的 fn 被调用 MaxRetries+1
// 次 (首次 + 重试). 这是 dm.crashRecovery 被 *消费* 而非仅存储
// 的关键证据.
func TestCrashRecovery_WrapsPanickingPromptRun(t *testing.T) {
	var crashCount, giveUpCount, invokeCount atomic.Int32
	policy := &CrashRecoveryConfig{
		MaxRetries:   2,
		InitialDelay: 1 * time.Millisecond,
		MaxDelay:     2 * time.Millisecond,
		Multiplier:   2.0,
		OnCrash:      func(_ string, _ int, _ error) { crashCount.Add(1) },
		OnGiveUp:     func(_ string, _ int) { giveUpCount.Add(1) },
	}
	dm := NewDaemonManager(nil, nil, DaemonConfig{CrashRecovery: policy})
	if dm.crashRecovery == nil {
		t.Fatalf("prerequisite: dm.crashRecovery should be non-nil")
	}

	runFn := func() error {
		return runWithRecover(func() {
			invokeCount.Add(1)
			panic("simulated prompt crash")
		})
	}
	err := dm.crashRecovery.RunWithRecovery(context.Background(), "test-session", runFn)
	if err == nil {
		t.Fatalf("RunWithRecovery returned nil, want non-nil (fn always panics)")
	}

	// MaxRetries=2 means first attempt + 2 retries = 3 total invokes,
	// 3 OnCrash callbacks, and 1 OnGiveUp at the end.
	// MaxRetries=2 意味 首次 + 2 次重试 = 3 次调用, 3 次 OnCrash,
	// 结束时 1 次 OnGiveUp.
	if got := invokeCount.Load(); got != 3 {
		t.Errorf("invokeCount = %d, want 3 (first attempt + MaxRetries=2 retries)", got)
	}
	if got := crashCount.Load(); got != 3 {
		t.Errorf("crashCount = %d, want 3 (one per attempt)", got)
	}
	if got := giveUpCount.Load(); got != 1 {
		t.Errorf("giveUpCount = %d, want 1", got)
	}
}