package tokenizer import ( "strings" "testing" ) func BenchmarkEstimateTokens(b *testing.B) { b.Run("English_short", func(b *testing.B) { text := "The quick brown fox jumps over the lazy dog" b.ReportAllocs() for b.Loop() { EstimateTokens(text) } }) b.Run("English_1KB", func(b *testing.B) { text := strings.Repeat("This is a moderately long sentence for benchmarking token estimation. ", 20) b.ReportAllocs() for b.Loop() { EstimateTokens(text) } }) b.Run("CJK_short", func(b *testing.B) { text := "你好世界这是一个测试" b.ReportAllocs() for b.Loop() { EstimateTokens(text) } }) b.Run("CJK_1KB", func(b *testing.B) { text := strings.Repeat("这是一段用于性能基准测试的中文文本内容需要足够长才有意义", 15) b.ReportAllocs() for b.Loop() { EstimateTokens(text) } }) b.Run("Code_Go", func(b *testing.B) { text := `func (p *NormalizePipeline) Run(messages []query.Message) []query.Message { if len(messages) == 0 { return messages } sort.SliceStable(p.steps, func(i, j int) bool { return p.steps[i].Priority() < p.steps[j].Priority() }) result := messages for _, step := range p.steps { result = step.Normalize(result) } return result }` b.ReportAllocs() for b.Loop() { EstimateTokens(text) } }) b.Run("Mixed_CJK_English", func(b *testing.B) { text := strings.Repeat("flyto-agent 是一个 Go 引擎项目, providing ModelProvider interface for AI models. ", 10) b.ReportAllocs() for b.Loop() { EstimateTokens(text) } }) } func BenchmarkEstimateMessageTokens(b *testing.B) { for _, n := range []int{5, 20, 100} { msgs := make([]Message, n) for i := range msgs { msgs[i] = Message{Role: "user", Content: "Hello, this is a test message for benchmarking."} } b.Run(toString(n), func(b *testing.B) { b.ReportAllocs() for b.Loop() { EstimateMessageTokens(msgs) } }) } } func toString(n int) string { switch { case n < 10: return "5_msgs" case n < 50: return "20_msgs" default: return "100_msgs" } }