package engine // mcp_proxy_tool.go implements a tools.Tool wrapper around a tool exposed by // a plugin-owned MCP server. Each instance is a thin proxy: Execute forwards // the call to mcp.Manager.CallTool, which routes it to the correct server // subprocess via JSON-RPC. // // Lifecycle: these proxies are constructed and registered by // Engine.syncPluginMCPServers after a plugin's MCP server is connected and // its tool list is discovered. They are unregistered when the owning plugin // is disabled (DisablePlugin) or when the engine re-runs syncPluginMCPServers // (clean-slate rebuild). // // Naming: // - mgrServerKey is the key used inside mcp.Manager (e.g. "plugin.foo.bar") // and is an implementation detail not exposed to LLMs or permission rules. // - Name() returns the agent-facing tool name in the form // "pluginName:serverName/toolName", mirroring pluginShellTool's // "pluginName:toolName" convention so permission globs stay consistent. // // Why a proxy type instead of reusing pluginShellTool: shell tools exec a // subprocess per call, while MCP tools reuse a persistent JSON-RPC session // managed by mcp.Manager. The two execution models have nothing to share // beyond the tools.Tool interface itself. import ( "context" "encoding/json" "fmt" "strings" "git.flytoex.net/yuanwei/flyto-agent/internal/mcp" "git.flytoex.net/yuanwei/flyto-agent/pkg/tools" ) // mcpProxyTool forwards tool calls to a plugin-owned MCP server via // mcp.Manager. type mcpProxyTool struct { name string // agent-facing name "pluginName:serverName/toolName" description string // from MCP server tool listing schema json.RawMessage // from MCP server tool listing (inputSchema) mgrServerKey string // server key inside mcp.Manager (e.g. "plugin.foo.bar") toolName string // raw tool name inside the MCP server pluginName string // owning plugin, for audit/permission source tagging mgr *mcp.Manager // shared engine-level manager } func (t *mcpProxyTool) Name() string { return t.name } func (t *mcpProxyTool) Description(ctx context.Context) string { return t.description } func (t *mcpProxyTool) InputSchema() json.RawMessage { if len(t.schema) == 0 { return emptyObjectSchema } return t.schema } // Metadata declares conservative defaults: MCP tools can do anything the // server subprocess does, so we mark them non-concurrency-safe, non-readonly // and non-destructive by default. PermissionClass "generic" routes through // the default rule-matching path (no bash/file fast paths). // // Why not read these from the MCP server: the MCP protocol has no standard // way to declare concurrency/read-only semantics per tool. Being conservative // means the orchestrator serializes MCP calls, which is slower but safe. // A future extension could honor an annotations field if MCP adds one. func (t *mcpProxyTool) Metadata() tools.Metadata { return tools.Metadata{ ConcurrencySafe: false, ReadOnly: false, Destructive: false, PermissionClass: "generic", } } // Execute forwards the call to mcp.Manager.CallTool and translates the // MCP ToolCallResult into a tools.Result. // // Argument decoding: MCP expects args as a JSON object (map[string]any). // We Unmarshal directly; if the schema says "object" but the LLM sends a // primitive (unlikely given the schema is echoed back to it), Unmarshal // fails and we return an IsError Result rather than a Go error, so the // agent can self-correct on the next turn. func (t *mcpProxyTool) Execute(ctx context.Context, input json.RawMessage, _ tools.ProgressFunc) (*tools.Result, error) { var args map[string]any if len(input) > 0 { if err := json.Unmarshal(input, &args); err != nil { return &tools.Result{ Output: fmt.Sprintf("mcp tool %q: invalid arguments (expected JSON object): %v", t.name, err), IsError: true, }, nil } } // mcp.Manager.CallTool is blocking. The client inside the manager has its // own request timeout (SetRequestTimeout), so ctx cancellation is advisory // here - we cannot interrupt an in-flight JSON-RPC request mid-read // without also killing the server subprocess, which would hurt other // plugins sharing the same server key. Fail-safe: accept the latency. result, err := t.mgr.CallTool(t.mgrServerKey, t.toolName, args) if err != nil { return nil, fmt.Errorf("mcp tool %q: %w", t.name, err) } return mcpResultToToolResult(result), nil } // mcpResultToToolResult flattens a multi-content MCP ToolCallResult into the // single-string Output field of tools.Result. // // Why flatten: tools.Result.Output is a string for simplicity, but MCP // supports an array of content items (text / image / resource). The common // case by far is a single text item, which maps 1:1. For mixed content we // join text items with "\n" and annotate non-text items inline. Full // structured passthrough (images as data URIs etc.) is a future extension - // the agent loop currently has no renderer for image tool results anyway. func mcpResultToToolResult(r *mcp.ToolCallResult) *tools.Result { if r == nil { return &tools.Result{} } var parts []string for _, c := range r.Content { switch c.Type { case "text": parts = append(parts, c.Text) case "image": parts = append(parts, fmt.Sprintf("[image %s, %d bytes base64]", c.MimeType, len(c.Data))) case "resource": parts = append(parts, fmt.Sprintf("[resource %s]", c.URI)) default: parts = append(parts, fmt.Sprintf("[unknown content type %q]", c.Type)) } } return &tools.Result{ Output: strings.Join(parts, "\n"), IsError: r.IsError, } } // emptyObjectSchema is the fallback InputSchema when an MCP server declares // no inputSchema for a tool. Matches pluginShellTool's default for symmetry. var emptyObjectSchema = json.RawMessage(`{"type":"object","properties":{},"additionalProperties":true}`) // pluginMCPServerKeyPrefix is the prefix used for mcp.Manager server keys // that are owned by the plugin subsystem. Keys not starting with this // prefix belong to user-configured servers (from settings.json) and must // not be touched by syncPluginMCPServers. // // Why a prefix rather than a separate manager: one Manager per engine keeps // reconnect loops, elicitation handlers and tool caches in one place. // Namespacing via prefix is a minimal change to support co-existence of // plugin-owned and user-configured MCP servers in the same manager. const pluginMCPServerKeyPrefix = "plugin." // pluginMCPServerKey builds the mcp.Manager key for a plugin-owned server. // Format: "plugin..". The plugin and server names // are already validated by manifest loader (no dots allowed in plugin name; // server name is a map key with no restrictions, so a dot there would // make parsePluginMCPServerKey ambiguous - see note there). func pluginMCPServerKey(pluginName, serverName string) string { return pluginMCPServerKeyPrefix + pluginName + "." + serverName } // parsePluginMCPServerKey splits a key back into (pluginName, serverName). // Returns ok=false if the key does not start with pluginMCPServerKeyPrefix. // // Limitation: if a plugin's serverName contains a dot the split is // ambiguous. We split on the *first* dot after the prefix, treating // everything after as the serverName. Plugin names are validated to be // alphanumeric+dash+underscore by manifest validator, so the first dot // after the prefix is always the plugin/server separator. func parsePluginMCPServerKey(key string) (pluginName, serverName string, ok bool) { if !strings.HasPrefix(key, pluginMCPServerKeyPrefix) { return "", "", false } rest := key[len(pluginMCPServerKeyPrefix):] dot := strings.IndexByte(rest, '.') if dot < 0 { return "", "", false } return rest[:dot], rest[dot+1:], true } // mcpProxyToolName builds the agent-facing tool name. // Format: "pluginName:serverName/toolName" - colon separates plugin from // server (matching pluginShellTool's "plugin:tool" convention) and slash // separates server from tool (matching filesystem path intuition). // For config-owned servers (engine.Config.MCPServers) the convention is // to pass pluginName="config" so the name reads "config:/", // symmetric with the plugin path. // // mcpProxyToolName 构造 Agent 侧看到的 tool 名. 格式 // "pluginName:serverName/toolName". 对 config-owned server // (engine.Config.MCPServers) 约定 pluginName 传 "config", 得到 // "config:/", 跟 plugin 路径的命名对称. func mcpProxyToolName(pluginName, serverName, toolName string) string { return pluginName + ":" + serverName + "/" + toolName } // configMCPServerKeyPrefix namespaces mcp.Manager keys for servers declared // via engine.Config.MCPServers, so they coexist with plugin-owned servers // ("plugin.") in a single Manager without either path touching the other. // syncPluginMCPServers / shutdownPluginMCPServers only recognise the // "plugin." prefix and leave "config." keys alone; symmetrically, the // config path never touches "plugin." keys. // // configMCPServerKeyPrefix 是 engine.Config.MCPServers 路径在 mcp.Manager // 里的 key 前缀. 跟 "plugin." 前缀并列, 两套路径共用同一个 Manager, 互不打扰. // syncPluginMCPServers / shutdownPluginMCPServers 只识别 "plugin." 前缀, // 不碰 "config." key; 反之 config 路径也不碰 "plugin." key. const configMCPServerKeyPrefix = "config." // configMCPServerKey builds the mcp.Manager key for a server declared via // engine.Config.MCPServers. Server name comes verbatim from the caller's // config.MCPServerConfig.Name; the engine does not parse or validate it // beyond "non-empty" (empty Name is skipped with an observer event). // // configMCPServerKey 给 engine.Config.MCPServers 声明的 server 建 Manager key. // serverName 直接用调用方 config.MCPServerConfig.Name 的原值; 引擎除了 // 空值 (skip + observer 事件) 之外不做任何校验. func configMCPServerKey(serverName string) string { return configMCPServerKeyPrefix + serverName } // parseConfigMCPServerKey returns the serverName when the key was produced // by configMCPServerKey. ok=false means the key belongs to another subsystem // (plugin-owned or user-configured via settings.json). // // parseConfigMCPServerKey 从 key 反推出 serverName. 若 key 不是由 // configMCPServerKey 造出来的 (比如 plugin-owned 或 settings.json 用户配置), // 返回 ok=false. func parseConfigMCPServerKey(key string) (serverName string, ok bool) { if !strings.HasPrefix(key, configMCPServerKeyPrefix) { return "", false } return key[len(configMCPServerKeyPrefix):], true }