package tokenizer

import (
	"strings"
	"testing"
)

func BenchmarkEstimateTokens(b *testing.B) {
	b.Run("English_short", func(b *testing.B) {
		text := "The quick brown fox jumps over the lazy dog"
		b.ReportAllocs()
		for b.Loop() {
			EstimateTokens(text)
		}
	})

	b.Run("English_1KB", func(b *testing.B) {
		text := strings.Repeat("This is a moderately long sentence for benchmarking token estimation. ", 20)
		b.ReportAllocs()
		for b.Loop() {
			EstimateTokens(text)
		}
	})

	b.Run("CJK_short", func(b *testing.B) {
		text := "你好世界这是一个测试"
		b.ReportAllocs()
		for b.Loop() {
			EstimateTokens(text)
		}
	})

	b.Run("CJK_1KB", func(b *testing.B) {
		text := strings.Repeat("这是一段用于性能基准测试的中文文本内容需要足够长才有意义", 15)
		b.ReportAllocs()
		for b.Loop() {
			EstimateTokens(text)
		}
	})

	b.Run("Code_Go", func(b *testing.B) {
		text := `func (p *NormalizePipeline) Run(messages []query.Message) []query.Message {
	if len(messages) == 0 {
		return messages
	}
	sort.SliceStable(p.steps, func(i, j int) bool {
		return p.steps[i].Priority() < p.steps[j].Priority()
	})
	result := messages
	for _, step := range p.steps {
		result = step.Normalize(result)
	}
	return result
}`
		b.ReportAllocs()
		for b.Loop() {
			EstimateTokens(text)
		}
	})

	b.Run("Mixed_CJK_English", func(b *testing.B) {
		text := strings.Repeat("flyto-agent 是一个 Go 引擎项目, providing ModelProvider interface for AI models. ", 10)
		b.ReportAllocs()
		for b.Loop() {
			EstimateTokens(text)
		}
	})
}

func BenchmarkEstimateMessageTokens(b *testing.B) {
	for _, n := range []int{5, 20, 100} {
		msgs := make([]Message, n)
		for i := range msgs {
			msgs[i] = Message{Role: "user", Content: "Hello, this is a test message for benchmarking."}
		}
		b.Run(toString(n), func(b *testing.B) {
			b.ReportAllocs()
			for b.Loop() {
				EstimateMessageTokens(msgs)
			}
		})
	}
}

func toString(n int) string {
	switch {
	case n < 10:
		return "5_msgs"
	case n < 50:
		return "20_msgs"
	default:
		return "100_msgs"
	}
}