diff --git a/.gitignore b/.gitignore index 92341b1ee..6bec4111a 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,8 @@ logs/ # GHA credentials gha-creds-*.json -QWEN.md \ No newline at end of file +QWEN.md + +__pycache__/ +*.py[codz] +*$py.class \ No newline at end of file diff --git a/docs/cli/index.md b/docs/cli/index.md index e32eca146..8c60afce6 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -27,3 +27,42 @@ Qwen Code executes the command and prints the output to your terminal. Note that ```bash qwen -p "What is fine tuning?" ``` + +### Structured stream-json mode + +For programmatic integrations, Qwen Code supports structured JSON Lines input and output: + +- `--output-format stream-json` switches stdout to emit Claude-compatible envelopes (`user`, `assistant`, `result`, etc.). +- `--input-format stream-json` lets you pipe newline-delimited JSON requests into stdin (e.g., control requests and user messages). +- `--include-partial-messages` enables incremental `stream_event` deltas alongside the final assistant message. + +Example one-shot invocation: + +```bash +echo '{"type":"user","message":{"content":"List supported flags"}}' \ + | qwen --input-format stream-json --output-format stream-json +``` + +When run in this mode, every stdout line is a standalone JSON object you can parse reliably. Control responses (for example, acknowledging `control_request.initialize`) are also written using the same envelope schema. + +To keep a session open for multiple messages, omit `--prompt` and keep stdin open (for example, by running the CLI directly and typing JSON lines): + +```bash +npm run stream-json-session +``` + +The process will remain active until EOF (`Ctrl+D`) or an explicit `control_request.interrupt`, making it suitable for SDK transports that maintain a persistent subprocess connection. + +The repository also provides a minimal Python client sample at +`docs/examples/stream-json/simple_stream_json_client.py`. The script is adapted +from `third-party/anthropics/claude-agent-sdk-python/examples/quick_start.py` +and illustrates how to drive the session lifecycle with `control_request`, while +showcasing a short multi-turn exchange (sending several `user` messages in a +row): + +```bash +python docs/examples/stream-json/simple_stream_json_client.py +``` + +Each log entry is separated with `------` and prefixed with `[client]` or `[cli]` +to make debugging the JSON stream easier. diff --git a/docs/examples/stream-json/README.md b/docs/examples/stream-json/README.md new file mode 100644 index 000000000..8e545754e --- /dev/null +++ b/docs/examples/stream-json/README.md @@ -0,0 +1,176 @@ +# Stream JSON Interaction Example + +This example demonstrates how to drive the Qwen Code CLI directly via the JSON Lines protocol and provides a mock SDK client script to facilitate integration testing. + +## Prerequisites + +- `qwen code` is installed locally and ready to run (or run `npx tsx packages/cli/index.ts` inside the repo). +- It is recommended to enable `--include-partial-messages` to experience incremental events. + +## Quick CLI Walkthrough + +1. Prepare an input stream (write it to `examples/stream-json/request.jsonl`): + + ```bash + cat <<'EOF' > docs/examples/stream-json/request.jsonl + {"type":"control_request","request_id":"req-init-1","request":{"subtype":"initialize","hooks":null}} + {"type":"user","message":{"role":"user","content":[{"type":"text","text":"请阅读 README.md 并总结三个关键特性。"}]}} + {"type":"control_request","request_id":"req-interrupt-1","request":{"subtype":"interrupt"}} + EOF + ``` + +2. Run the CLI and pipe the JSONL into standard input. Inside the repo, `npm run qwen` is recommended (parameters for the CLI itself go after `--`): + + ```bash + npm run qwen -- \ + --input-format stream-json \ + --output-format stream-json \ + --include-partial-messages \ + --model glm-4.6 \ + < docs/examples/stream-json/request.jsonl + ``` + +3. Observe standard output: when the CLI initializes successfully, it prints `system/init`, `control_response`, and `stream_event` entries containing `thinking_delta` and `text_delta`, and finally ends with a `result` event. + +> Note: If the CLI emits events like `control_request.can_use_tool`, the caller must promptly write back a `control_response`; otherwise, the process waits for the acknowledgement. + +## Mock SDK Script + +- Location: `docs/examples/stream-json/simple_stream_json_client.py` +- Functionality: launches a CLI subprocess, automatically completes the `initialize` handshake, sends user messages, and provides sample responses for control requests such as `can_use_tool` and `hook_callback`. Once `result` is received, the script ends the session with an `interrupt`. + +Usage example: + +```bash +python docs/examples/stream-json/simple_stream_json_client.py +``` + +By default the script launches the CLI via `npm run qwen -- …`. To replace the command, set the `QWEN_STREAM_JSON_COMMAND` environment variable, for example: + +```bash +# export QWEN_STREAM_JSON_COMMAND="npm run qwen -- --input-format stream-json --output-format stream-json --include-partial-messages --model glm-4.6" +export QWEN_STREAM_JSON_COMMAND="npm run qwen -- --input-format stream-json --output-format stream-json --include-partial-messages" +python docs/examples/stream-json/simple_stream_json_client.py +``` + +The script exposes a `StreamJsonClient` class that can be customized with `StreamJsonClientOptions` to define commands, initialization requests, user messages, and control handlers. For example: + +```python +import asyncio +import sys + +sys.path.append("docs/examples/stream-json") +from simple_stream_json_client import StreamJsonClient, StreamJsonClientOptions + +options = StreamJsonClientOptions( + command=["npm", "run", "qwen", "--", "--input-format", "stream-json", "--output-format", "stream-json"], + user_messages=[ + { + "type": "user", + "message": { + "role": "user", + "content": [{"type": "text", "text": "举例说明 stream-json 协议用途"}], + }, + } + ], + extra_handlers={ + # Reject sensitive tools + "can_use_tool": lambda client, request_id, payload: client.send_control_response( + request_id, + success=True, + response={"behavior": "deny", "message": "demo client blocks all tools"}, + ), + }, +) + +client = StreamJsonClient(options) +asyncio.run(client.run()) +``` + +## Automated Tests + +New unit tests validate the Stream JSON functionality. Run them with: + +```bash +npx vitest run \ + packages/cli/src/streamJson/writer.test.ts \ + packages/cli/src/streamJson/session.test.ts \ + packages/cli/src/nonInteractiveCli.test.ts +``` + +The tests cover: +- `system/init` events and session metadata +- Incremental messages (`thinking_delta`, `input_json_delta`, etc.) +- `usage` and `duration_api_ms` fields in the `result` event +- Control request bridging logic for `can_use_tool` + +Following these steps quickly verifies the Stream JSON protocol’s output and control-channel behavior in the CLI. + +### Sample Run Log (Excerpt) + +The table below selects key log entries from the script run and annotates the showcased capabilities: + +| Log Snippet | Description | +| --- | --- | +| `{"type":"control_response","request_id":"demo-init","success":true,...}`
`{"type":"system","subtype":"init",...}` | Script writes back `initialize`, and the CLI outputs session metadata in the `system/init` event (control channel + system event). +| `{"type":"user","message":{"role":"user","content":"请阅读 README.md..."}}` | The client sends a `user` message to trigger a single-turn session. +| `{"type":"stream_event","event":{"type":"message_start",...}}`
`{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"text_delta","text":"..."}}}`
`{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"input_json_delta",...}}}` | Incremental event streaming: text `text_delta` and tool input `input_json_delta`. +| `{"type":"assistant","message":{"content":[{"type":"tool_use",...}]}}`
`{"type":"user","message":{"content":[{"type":"tool_result",...}]},...}` | The CLI initiates a tool call; the script writes back `tool_result`, demonstrating bridging for `can_use_tool` and tool-result propagation. +| Continuous `content_block_delta` entries printing the README summary
`{"type":"assistant","message":{"content":[{"type":"text","text":"..."}]}}` | Incremental reasoning/text events align with the final complete `assistant` message. +| `{"type":"result","subtype":"session_summary","duration_api_ms":..., ...}` | The `result` event includes statistics such as `duration_api_ms` and `num_turns`. +| `{"type":"control_response","request_id":"demo-interrupt","success":true,...}` | The script sends an `interrupt` to conclude, and the CLI returns a success response. + +## Manual Verification Guide + +Use the following commands to manually check that the core capabilities introduced in this implementation are present: + +1. **Initialization and Incremental Events** (verify that the `system/init` and `stream_event` lifecycle matches the Claude protocol) + ```bash + npm run qwen -- \ + --input-format stream-json \ + --output-format stream-json \ + --include-partial-messages \ + --model glm-4.6 \ + < docs/examples/stream-json/request.jsonl + ``` + Expected output: initialization includes the complete `system/init` fields; during the assistant response, `message_start`, `content_block_start/delta/stop`, and `message_stop` events appear. + +2. **Real-Time Control Channel** (verify advanced subtypes like `can_use_tool` and `hook_callback`) + ```bash + npm run qwen -- --input-format stream-json --output-format stream-json --model glm-4.6 + # Enter sequentially: + {"type":"control_request","request_id":"req-init","request":{"subtype":"initialize"}} + {"type":"user","message":{"role":"user","content":[{"type":"text","text":"请执行 ls"}]}} + ``` + Expected output: the CLI responds to the initialization request with `control_response.success`. When a tool invocation is triggered, it sends `control_request.can_use_tool`, and hook acknowledgements surface as `system hook_callback` messages. + +3. **MCP Message Bridging** (verify `mcp_message` passthrough) + ```bash + npm run qwen -- --input-format stream-json --output-format stream-json --model glm-4.6 + {"type":"control_request","request_id":"req-mcp","request":{"subtype":"mcp_message","server_name":"default","message":{"jsonrpc":"2.0","id":"1","method":"tools/list"}}} + ``` + Expected output: `control_response.success` contains `mcp_response.result.tools` listing the registered MCP tools. If MCP is not configured, the structured fields in `control_response.error` can be inspected instead. + +4. **Tool Result Linking** (verify `tool_result` carrying and `parent_tool_use_id`) + ```bash + npm run qwen -- --input-format stream-json --output-format stream-json --model glm-4.6 + {"type":"user","message":{"content":[{"type":"tool_result","tool_use_id":"demo-tool","content":[{"type":"text","text":"手动工具结果"}]}]},"parent_tool_use_id":"demo-tool"} + ``` + Expected output: during the subsequent `runNonInteractive` call the CLI reads this `tool_result`, continues the dialog when necessary, and suppresses redundant user echoes. + +These manual steps cover the advanced control requests, system initialization fields, incremental event completion, user-envelope extensions, and error semantics introduced by the remaining OpenSpec tasks. + +## Automated Verification Script + +To execute all of the above control-channel scenarios at once, run: + +```bash +python docs/examples/stream-json/validate_stream_json_cli.py +``` + +The script automatically launches `qwen code`, triggers events such as `initialize`, `hook_callback`, `can_use_tool`, and `mcp_message` in sequence, prints every request/response JSON line in real time, and finally summarizes the pass/fail status of each assertion in a table. Adjust behavior with these environment variables: + +- `QWEN_STREAM_JSON_COMMAND`: override the CLI launch command. +- `STREAM_JSON_VALIDATE_VERBOSE=0`: set to `0` to suppress line-by-line logging. +- `STREAM_JSON_VALIDATE_PROMPT=1`: enable the additional “user prompt triggers tool invocation” scenario (requires a usable model or proxy credentials). +- `STREAM_JSON_VALIDATE_TOOL_RESULT=1`: enable the additional scenario for the `tool_result + parent_tool_use_id` flow. diff --git a/docs/examples/stream-json/README_cn.md b/docs/examples/stream-json/README_cn.md new file mode 100644 index 000000000..02cfa1821 --- /dev/null +++ b/docs/examples/stream-json/README_cn.md @@ -0,0 +1,212 @@ +# Stream JSON 交互示例 + +本示例展示如何直接以 JSON Lines 协议驱动 Qwen Code CLI,并提供一个伪 SDK 客户端脚本,方便对接测试。 + +## 先决条件 + +- 本地已安装并可执行 `qwen code`(或在仓库内通过 `npx tsx packages/cli/index.ts` 运行)。 +- 建议使用 `--include-partial-messages` 以体验增量事件。 + +## CLI 快速体验 + +1. 准备一份输入流(写入 `docs/examples/stream-json/request.jsonl`): + + ```bash + cat <<'EOF' > docs/examples/stream-json/request.jsonl + {"type":"control_request","request_id":"req-init-1","request":{"subtype":"initialize","hooks":null}} + {"type":"user","message":{"role":"user","content":[{"type":"text","text":"请阅读 README.md 并总结三个关键特性。"}]}} + EOF + ``` + +2. 运行 CLI,并使用 `cat … -` 方式保留标准输入,便于后续继续发送控制消息或工具回执。仓库内推荐通过 `npm run qwen` 启动(注意 `--` 之后才是 CLI 自身参数): + + ```bash + cat docs/examples/stream-json/request.jsonl - | \ + npm run qwen -- \ + --input-format stream-json \ + --output-format stream-json \ + --include-partial-messages \ + --model glm-4.6 + ``` + + 日志请看 @./logs/1.log + +3. 观察标准输出:当 CLI 初始化成功时会输出 `system/init`、`control_response` 以及含 `thinking_delta`、`text_delta` 的 `stream_event`,最后以 `result` 事件结束。 + + 若需要在会话结束后主动终止 CLI,可在上一步命令的终端中手动输入: + + ``` + {"type":"control_request","request_id":"req-interrupt-1","request":{"subtype":"interrupt"}} + ``` + +4. 若更希望脚本化演示“打断”能力,请另建一份输入流(命名为 `request_interrupt.jsonl`,指令与上例不同): + + ```bash + cat <<'EOF' > docs/examples/stream-json/request_interrupt.jsonl + {"type":"control_request","request_id":"req-init-interrupt","request":{"subtype":"initialize","hooks":null}} + {"type":"user","message":{"role":"user","content":[{"type":"text","text":"请输出当前工作目录,并等待后续指令。"}]}} + {"type":"user","message":{"role":"user","content":[{"type":"text","text":"请列出当前目录中的所有文件。"}]}} + {"type":"user","message":{"role":"user","content":[{"type":"text","text":"若无更多操作,请准备结束对话。"}]}} + {"type":"control_request","request_id":"req-interrupt-final","request":{"subtype":"interrupt"}} + EOF + ``` + + 随后结合 `timeout` 或额外的 `interrupt` 输出来模拟外部终止: + + ```bash + timeout 10 sh -c \ + "cat docs/examples/stream-json/request_interrupt.jsonl - | \ + npm run qwen -- \ + --input-format stream-json \ + --output-format stream-json \ + --include-partial-messages \ + --model glm-4.6" + ``` + + 日志请看 @./logs/2.log + 该命令会自动发送两轮用户消息,并在最后推送 `interrupt`。当 `timeout` 到期或 CLI 处理完最后一个请求时,会立刻返回 `control_response/interrupt`,随后进程以 “OpenAI API Streaming Error: Request was aborted.” 类似日志结束——这正是预期的打断表现。 + +> 注意:若 CLI 发出 `control_request.can_use_tool` 等事件,需要调用方实时回写 `control_response`,否则进程会等待回执。 + +## 伪 SDK 脚本 + +- 位置:`docs/examples/stream-json/simple_stream_json_client.py` +- 功能:启动 CLI 子进程,自动完成 `initialize` 握手、发送用户消息,并对 `can_use_tool`、`hook_callback` 等控制请求给出示例响应。脚本会在收到 `result` 后通过 `interrupt` 结束会话。 + +运行示例: + +```bash +python docs/examples/stream-json/simple_stream_json_client.py +``` + +日志请看 @./logs/3.log + +默认脚本使用 `npm run qwen -- …` 启动 CLI,如需替换命令可设置环境变量 `QWEN_STREAM_JSON_COMMAND`,例如: + +```bash +# export QWEN_STREAM_JSON_COMMAND="npm run qwen -- --input-format stream-json --output-format stream-json --include-partial-messages --model glm-4.6" +export QWEN_STREAM_JSON_COMMAND="npm run qwen -- --input-format stream-json --output-format stream-json --include-partial-messages" +python docs/examples/stream-json/simple_stream_json_client.py +``` + +脚本内部提供了 `StreamJsonClient` 类,可通过 `StreamJsonClientOptions` 自定义命令、初始化请求、用户消息、控制处理函数等,例如: + +```python +import asyncio +import sys + +sys.path.append("docs/examples/stream-json") +from simple_stream_json_client import StreamJsonClient, StreamJsonClientOptions + +options = StreamJsonClientOptions( + command=["npm", "run", "qwen", "--", "--input-format", "stream-json", "--output-format", "stream-json"], + user_messages=[ + { + "type": "user", + "message": { + "role": "user", + "content": [{"type": "text", "text": "举例说明 stream-json 协议用途"}], + }, + } + ], + extra_handlers={ + # 拒绝敏感工具 + "can_use_tool": lambda client, request_id, payload: client.send_control_response( + request_id, + success=True, + response={"behavior": "deny", "message": "demo client blocks all tools"}, + ), + }, +) + +client = StreamJsonClient(options) +asyncio.run(client.run()) +``` + +## 自动化测试 + +新增单元测试验证了 stream-json 相关功能,可通过以下命令运行: + +```bash +npx vitest run \ + packages/cli/src/streamJson/writer.test.ts \ + packages/cli/src/streamJson/session.test.ts \ + packages/cli/src/nonInteractiveCli.test.ts +``` + +测试涵盖: +- `system/init` 事件与会话元数据 +- 部分消息增量(`thinking_delta`、`input_json_delta` 等) +- `result` 事件中的 `usage`、`duration_api_ms` 字段 +- `can_use_tool` 控制请求桥接逻辑 + +通过上述步骤即可快速验证 Stream JSON 协议在 CLI 中的输出与控制通道行为。 + +### 运行日志示例(节选) + +下表摘录脚本运行过程中的关键日志,并标注演示的能力: + +| 日志片段 | 功能说明 | +| --- | --- | +| `{"type":"control_response","request_id":"demo-init","success":true,...}`
`{"type":"system","subtype":"init",...}` | 脚本回写 `initialize`,CLI 输出 `system/init` 会话元数据(控制通道 + 系统事件)。 | +| `{"type":"user","message":{"role":"user","content":"请阅读 README.md..."}}` | 客户端发送 `user` 消息,触发单轮会话。 | +| `{"type":"stream_event","event":{"type":"message_start",...}}`
`{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"text_delta","text":"..."}}}`
`{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"input_json_delta",...}}}` | 增量事件补全:文本 `text_delta` 与工具输入 `input_json_delta`。 | +| `{"type":"assistant","message":{"content":[{"type":"tool_use",...}]}}`
`{"type":"user","message":{"content":[{"type":"tool_result",...}]},...}` | CLI 发起工具调用,脚本回写 `tool_result`,展示 `can_use_tool` 桥接及工具结果回传。 | +| 连续 `content_block_delta` 输出 README 摘要
`{"type":"assistant","message":{"content":[{"type":"text","text":"..."}]}}` | 思考/文本增量与最终完整 `assistant` 消息一致。 | +| `{"type":"result","subtype":"session_summary","duration_api_ms":..., ...}` | `result` 事件包含 `duration_api_ms`、`num_turns` 等统计字段。 | +| `{"type":"control_response","request_id":"demo-interrupt","success":true,...}` | 脚本发送 `interrupt` 收尾,CLI 返回成功响应。 | + +## 手工验证指南 + +如下命令便于人工对照本次实现的关键能力是否落实: + +1. **初始化与增量事件**(验证 `system/init`、`stream_event` 生命周期对齐 Claude 协议) + ```bash + npm run qwen -- \ + --input-format stream-json \ + --output-format stream-json \ + --include-partial-messages \ + --model glm-4.6 \ + < docs/examples/stream-json/request.jsonl + ``` + 期望输出:初始化时包含完整 `system/init` 字段;助手回复过程中出现 `message_start`、`content_block_start/delta/stop` 及 `message_stop`。 + +2. **实时控制通道**(验证 `can_use_tool`、`hook_callback` 等高级子类型) + ```bash + npm run qwen -- --input-format stream-json --output-format stream-json --model glm-4.6 + # 依次输入: + {"type":"control_request","request_id":"req-init","request":{"subtype":"initialize"}} + {"type":"user","message":{"role":"user","content":[{"type":"text","text":"请执行 ls"}]}} + ``` + 期望输出:CLI 对初始化请求返回 `control_response.success`,在触发工具调用时向上游发送 `control_request.can_use_tool`,同时可看到钩子回执产生的 `system hook_callback` 消息。 + +3. **MCP 消息桥接**(验证 `mcp_message` 透传) + ```bash + npm run qwen -- --input-format stream-json --output-format stream-json --model glm-4.6 + {"type":"control_request","request_id":"req-mcp","request":{"subtype":"mcp_message","server_name":"default","message":{"jsonrpc":"2.0","id":"1","method":"tools/list"}}} + ``` + 期望输出:`control_response.success` 中的 `mcp_response.result.tools` 列出已注册的 MCP 工具。若未配置 MCP,可用于验证错误回执 `control_response.error` 的结构化字段。 + +4. **工具结果回链**(验证 `user` 信封携带 `tool_result` 与 `parent_tool_use_id`) + ```bash + npm run qwen -- --input-format stream-json --output-format stream-json --model glm-4.6 + {"type":"user","message":{"content":[{"type":"tool_result","tool_use_id":"demo-tool","content":[{"type":"text","text":"手动工具结果"}]}]},"parent_tool_use_id":"demo-tool"} + ``` + 期望输出:CLI 会在后续调用 `runNonInteractive` 时读取该 `tool_result`,并在需要时继续衍生对话,不再记录用户回响。 + +上述手工步骤覆盖了 OpenSpec 剩余任务中新增的高级控制请求、系统初始化字段、增量事件补全、用户信封扩展与错误语义规范化等核心能力。 + +## 自动化验证脚本 + +若需一次性跑通上述控制通道场景,可执行: + +```bash +python docs/examples/stream-json/validate_stream_json_cli.py +``` + +脚本将自动启动 `qwen code`,依次触发 `initialize`/`hook_callback`/`can_use_tool`/`mcp_message` 等事件,并把所有请求/响应 JSON 行实时打印出来,最终以表格形式汇总每个断言的通过状态。可通过以下环境变量调整行为: + +- `QWEN_STREAM_JSON_COMMAND`:自定义 CLI 启动命令。 +- `STREAM_JSON_VALIDATE_VERBOSE=0`:如需关闭逐行日志,可将该变量设为 `0`。 +- `STREAM_JSON_VALIDATE_PROMPT=1`:启用“用户 prompt 触发工具调用”的附加场景(需要可用模型或代理凭据)。 +- `STREAM_JSON_VALIDATE_TOOL_RESULT=1`:启用“tool_result + parent_tool_use_id 链路”的附加场景。 diff --git a/docs/examples/stream-json/request.jsonl b/docs/examples/stream-json/request.jsonl new file mode 100644 index 000000000..b563cafc7 --- /dev/null +++ b/docs/examples/stream-json/request.jsonl @@ -0,0 +1,2 @@ +{"type":"control_request","request_id":"req-init-1","request":{"subtype":"initialize","hooks":null}} +{"type":"user","message":{"role":"user","content":[{"type":"text","text":"请阅读 README.md 并总结三个关键特性。"}]}} diff --git a/docs/examples/stream-json/request_interrupt.jsonl b/docs/examples/stream-json/request_interrupt.jsonl new file mode 100644 index 000000000..8071b652e --- /dev/null +++ b/docs/examples/stream-json/request_interrupt.jsonl @@ -0,0 +1,5 @@ +{"type":"control_request","request_id":"req-init-interrupt","request":{"subtype":"initialize","hooks":null}} +{"type":"user","message":{"role":"user","content":[{"type":"text","text":"请输出当前工作目录,并等待后续指令。"}]}} +{"type":"user","message":{"role":"user","content":[{"type":"text","text":"请列出当前目录中的所有文件。"}]}} +{"type":"user","message":{"role":"user","content":[{"type":"text","text":"若无更多操作,请准备结束对话。"}]}} +{"type":"control_request","request_id":"req-interrupt-final","request":{"subtype":"interrupt"}} diff --git a/docs/examples/stream-json/sdk.py b/docs/examples/stream-json/sdk.py new file mode 100644 index 000000000..b5cac7876 --- /dev/null +++ b/docs/examples/stream-json/sdk.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +"""Lightweight stream-json pseudo SDK for Qwen Code CLI. + +This module mimics the structure of Anthropic's agent SDK examples by exposing +an easy-to-use `StreamJsonClient` class. The client handles: + +* spawning the CLI subprocess; +* sending initialization & user messages; +* dispatching control requests to pluggable handlers; +* emitting helper responses (e.g. can_use_tool, hook callbacks); +* optional auto-interrupt once a `result` envelope is observed. + +The goal is to keep the transport simple and synchronous enough for demos +while illustrating how one could wrap the stream-json protocol in a SDK-like +interface. +""" + +from __future__ import annotations + +import asyncio +import json +import os +from dataclasses import dataclass, field +from typing import Any, Awaitable, Callable, Mapping, Sequence + + +DEFAULT_COMMAND = ( + "npm run qwen -- --input-format stream-json --output-format stream-json " + "--include-partial-messages --model glm-4.6" +) + + +def _split_command(raw: str) -> list[str]: + return raw.split() + + +ControlHandler = Callable[["StreamJsonClient", str, dict[str, Any]], Awaitable[None]] + + +@dataclass +class StreamJsonClientOptions: + """Configuration used to bootstrap the pseudo SDK client.""" + + command: Sequence[str] = field( + default_factory=lambda: _split_command( + os.environ.get("QWEN_STREAM_JSON_COMMAND", DEFAULT_COMMAND) + ) + ) + initialize_request: dict[str, Any] = field( + default_factory=lambda: { + "type": "control_request", + "request_id": "demo-init", + "request": {"subtype": "initialize", "hooks": None}, + } + ) + user_messages: Sequence[dict[str, Any]] = field( + default_factory=lambda: [ + { + "type": "user", + "message": { + "role": "user", + "content": [ + { + "type": "text", + "text": "请阅读 README.md,概述项目目标并列出两个关键命令。", + } + ], + }, + "options": {"temporary_model": "glm-4.6"}, + } + ] + ) + auto_interrupt_on_result: bool = True + sleep_before_interrupt_ms: int | None = None + stdout_printer: Callable[[str], None] = print + extra_handlers: Mapping[str, ControlHandler] | None = None + + +class StreamJsonClient: + """Minimal pseudo SDK around the stream-json protocol.""" + + def __init__(self, options: StreamJsonClientOptions | None = None): + self.options = options or StreamJsonClientOptions() + self._process: asyncio.subprocess.Process | None = None + + self._handlers: dict[str, ControlHandler] = { + **self._default_handlers(), + **(self.options.extra_handlers or {}), + } + + async def run(self) -> None: + command = list(self.options.command) + self._log(f"[spawn] {' '.join(command)}") + process = await asyncio.create_subprocess_exec( + *command, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + ) + self._process = process + assert process.stdout is not None + + reader_task = asyncio.create_task(self._read_stdout(process.stdout)) + + await self._send(self.options.initialize_request) + for message in self.options.user_messages: + await self._send(message) + + await reader_task + await process.wait() + self._log(f"CLI exited with return code {process.returncode}") + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + async def _send(self, payload: dict[str, Any]) -> None: + assert self._process is not None and self._process.stdin is not None + line = json.dumps(payload, ensure_ascii=False) + self._log(f"[client] {line}") + self._process.stdin.write((line + "\n").encode("utf-8")) + await self._process.stdin.drain() + + async def _read_stdout(self, stdout: asyncio.StreamReader) -> None: + while True: + raw = await stdout.readline() + if not raw: + break + decoded = raw.decode("utf-8").rstrip() + self._log(f"[cli] {decoded}") + try: + envelope = json.loads(decoded) + except json.JSONDecodeError: + continue + await self._handle_envelope(envelope) + + async def _handle_envelope(self, envelope: dict[str, Any]) -> None: + envelope_type = envelope.get("type") + if envelope_type == "control_request": + request_id = envelope.get("request_id", "") + payload = envelope.get("request") or {} + subtype = payload.get("subtype") + handler = self._handlers.get(subtype, self._handle_unknown_control) + await handler(self, request_id, payload) + elif envelope_type == "result" and self.options.auto_interrupt_on_result: + if self.options.sleep_before_interrupt_ms: + await asyncio.sleep(self.options.sleep_before_interrupt_ms / 1000) + await self._send( + { + "type": "control_request", + "request_id": "demo-interrupt", + "request": {"subtype": "interrupt"}, + } + ) + + def _default_handlers(self) -> dict[str, ControlHandler]: + return { + "initialize": self._handle_initialize, + "can_use_tool": self._handle_can_use_tool, + "hook_callback": self._handle_hook_callback, + "mcp_message": self._handle_mcp_message, + } + + # ------------------------------------------------------------------ + # Default control handlers + # ------------------------------------------------------------------ + + async def _handle_initialize( + self, request_id: str, payload: dict[str, Any] + ) -> None: + capabilities = payload.get("capabilities") or {} + await self.send_control_response( + request_id, + success=True, + response={ + "subtype": "initialize", + "capabilities": { + "can_handle_can_use_tool": True, + "can_handle_hook_callback": True, + **capabilities, + }, + }, + ) + + async def _handle_can_use_tool( + self, request_id: str, payload: dict[str, Any] + ) -> None: + tool_name = payload.get("tool_name", "unknown_tool") + await self.send_control_response( + request_id, + success=True, + response={"behavior": "allow", "tool_name": tool_name}, + ) + + async def _handle_hook_callback( + self, request_id: str, payload: dict[str, Any] + ) -> None: + await self.send_control_response( + request_id, + success=True, + response={"async": False, "decision": "continue"}, + ) + + async def _handle_mcp_message( + self, request_id: str, payload: dict[str, Any] + ) -> None: + await self.send_control_response( + request_id, + success=False, + error="Demo client does not implement MCP forwarding.", + ) + + async def _handle_unknown_control( + self, request_id: str, payload: dict[str, Any] + ) -> None: + subtype = payload.get("subtype") + await self.send_control_response( + request_id, + success=False, + error=f"Unsupported control_request subtype: {subtype}", + ) + + # ------------------------------------------------------------------ + # Misc helpers + # ------------------------------------------------------------------ + + async def send_control_response( + self, + request_id: str, + *, + success: bool, + response: dict[str, Any] | None = None, + error: str | dict[str, Any] | None = None, + ) -> None: + envelope: dict[str, Any] = { + "type": "control_response", + "request_id": request_id, + "success": success, + } + if success: + envelope["response"] = response or {} + else: + envelope["error"] = error or "Unknown error" + await self._send(envelope) + + def _log(self, message: str) -> None: + self.options.stdout_printer(message) + + +__all__ = ["StreamJsonClient", "StreamJsonClientOptions"] diff --git a/docs/examples/stream-json/simple_stream_json_client.py b/docs/examples/stream-json/simple_stream_json_client.py new file mode 100644 index 000000000..ed7682400 --- /dev/null +++ b/docs/examples/stream-json/simple_stream_json_client.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Demonstration script using the lightweight stream-json pseudo SDK.""" + +import asyncio +import sys + +sys.path.append("docs/examples/stream-json") + +from sdk import StreamJsonClient, StreamJsonClientOptions + + +async def main() -> None: + client = StreamJsonClient(StreamJsonClientOptions()) + await client.run() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/stream-json/validate_stream_json_cli.py b/docs/examples/stream-json/validate_stream_json_cli.py new file mode 100644 index 000000000..2ad005765 --- /dev/null +++ b/docs/examples/stream-json/validate_stream_json_cli.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 +""" +Automated stream-json validation harness. + +This script boots the Qwen Code CLI in stream-json mode and walks through a set +of control-channel scenarios to verify: + * initialize/system metadata emission (slash commands, permission mode, etc.) + * advanced control requests: can_use_tool, hook_callback, mcp_message + * structured error surfaces for unsupported MCP servers + +All checks are performed locally without requiring model inference. Results are +rendered as a compact table to provide a quick visual status report. +""" + +from __future__ import annotations + +import asyncio +import contextlib +import json +import os +import shlex +import sys +import time +from dataclasses import dataclass +from typing import Any, Awaitable, Callable, Dict, List + +DEFAULT_COMMAND = ( + "npm run qwen -- --input-format stream-json --output-format stream-json " + "--include-partial-messages --model glm-4.6" +) + +Envelope = Dict[str, Any] +Predicate = Callable[[Envelope], bool] + + +def build_command() -> List[str]: + raw = os.environ.get("QWEN_STREAM_JSON_COMMAND", DEFAULT_COMMAND) + return shlex.split(raw) + + +@dataclass +class ValidationResult: + name: str + success: bool + detail: str + + +class StreamJsonCli: + """Thin async wrapper for interacting with the CLI subprocess.""" + + def __init__(self, command: List[str], verbose: bool = True) -> None: + self._command = command + self._verbose = verbose + self._process: asyncio.subprocess.Process | None = None + self._queue: asyncio.Queue[Envelope] = asyncio.Queue() + self._buffer: List[Envelope] = [] + self._reader_task: asyncio.Task[None] | None = None + + def _log(self, prefix: str, payload: Envelope) -> None: + formatted = json.dumps(payload, ensure_ascii=False) + print(f"[{prefix}] {formatted}") + + async def __aenter__(self) -> "StreamJsonCli": + self._process = await asyncio.create_subprocess_exec( + *self._command, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + ) + assert self._process.stdout is not None + self._reader_task = asyncio.create_task(self._read_stdout(self._process.stdout)) + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: + await self.shutdown() + + async def shutdown(self) -> None: + if self._process is None: + return + try: + await self.send( + { + "type": "control_request", + "request_id": "validator-interrupt", + "request": {"subtype": "interrupt"}, + } + ) + except Exception: + pass + try: + await asyncio.wait_for(self._process.wait(), timeout=3) + except asyncio.TimeoutError: + self._process.kill() + if self._reader_task: + self._reader_task.cancel() + with contextlib.suppress(Exception): + await self._reader_task + self._process = None + + async def _read_stdout(self, stdout: asyncio.StreamReader) -> None: + while True: + line = await stdout.readline() + if not line: + break + decoded = line.decode("utf-8", errors="replace").rstrip() + try: + envelope = json.loads(decoded) + except json.JSONDecodeError: + if self._verbose: + print(f"[cli] {decoded}") + continue + if self._verbose: + self._log("cli", envelope) + await self._queue.put(envelope) + + async def send(self, payload: Envelope) -> None: + if self._process is None or self._process.stdin is None: + raise RuntimeError("CLI process not started") + line = json.dumps(payload, ensure_ascii=False) + if self._verbose: + self._log("client", payload) + self._process.stdin.write((line + "\n").encode("utf-8")) + await self._process.stdin.drain() + + async def wait_for(self, predicate: Predicate, timeout: float, *, description: str) -> Envelope: + deadline = time.time() + timeout + + def check_buffer() -> Envelope | None: + for idx, envelope in enumerate(self._buffer): + if predicate(envelope): + return self._buffer.pop(idx) + return None + + match = check_buffer() + if match: + return match + + while True: + remaining = deadline - time.time() + if remaining <= 0: + raise TimeoutError(f"Timed out waiting for {description}") + try: + envelope = await asyncio.wait_for(self._queue.get(), timeout=remaining) + except asyncio.TimeoutError as exc: + raise TimeoutError(f"Timed out waiting for {description}") from exc + + if predicate(envelope): + return envelope + self._buffer.append(envelope) + + +async def scenario_initialize(cli: StreamJsonCli) -> ValidationResult: + print("-- client -> control_request.initialize (with hooks)") + hooks = { + "pre_tool": [ + { + "matcher": None, + "hookCallbackIds": ["validator-hook-1"], + } + ], + } + await cli.send( + { + "type": "control_request", + "request_id": "validator-init", + "request": {"subtype": "initialize", "hooks": hooks}, + } + ) + + response = await cli.wait_for( + lambda env: env.get("type") == "control_response" and env.get("request_id") == "validator-init", + timeout=5, + description="control_response.initialize", + ) + if not response.get("success"): + return ValidationResult("Initialize / Capabilities", False, f"Unexpected failure: {response.get('error')}") + + system_event = await cli.wait_for( + lambda env: env.get("type") == "system" and env.get("subtype") == "init", + timeout=5, + description="system.init", + ) + data = system_event.get("data") or {} + slash_commands = data.get("slash_commands") or [] + if not slash_commands: + return ValidationResult("Initialize / Capabilities", False, "slash_commands list is empty") + detail = f"Capabilities OK, {len(slash_commands)} slash commands discovered" + return ValidationResult("Initialize / Capabilities", True, detail) + + +async def scenario_hook_callback(cli: StreamJsonCli) -> ValidationResult: + print("-- client -> control_request.hook_callback") + await cli.send( + { + "type": "control_request", + "request_id": "validator-hook", + "request": { + "subtype": "hook_callback", + "callback_id": "validator-hook-1", + }, + } + ) + response = await cli.wait_for( + lambda env: env.get("type") == "control_response" and env.get("request_id") == "validator-hook", + timeout=3, + description="control_response.hook_callback", + ) + if not response.get("success"): + detail = response.get("error") or "hook callback rejected" + return ValidationResult("hook_callback handling", False, str(detail)) + payload = response.get("response") or {} + decision = payload.get("decision") + return ValidationResult( + "hook_callback handling", + decision == "continue", + f"decision={decision!r}", + ) + + +async def scenario_can_use_tool(cli: StreamJsonCli) -> ValidationResult: + print("-- client -> control_request.can_use_tool") + await cli.send( + { + "type": "control_request", + "request_id": "validator-tool", + "request": { + "subtype": "can_use_tool", + "tool_name": "edit", + "tool_use_id": "validator-tool-1", + "input": {"path": "README.md"}, + }, + } + ) + response = await cli.wait_for( + lambda env: env.get("type") == "control_response" and env.get("request_id") == "validator-tool", + timeout=3, + description="control_response.can_use_tool", + ) + if not response.get("success"): + detail = response.get("error") or "no detail" + return ValidationResult("can_use_tool handling", False, str(detail)) + behavior = (response.get("response") or {}).get("behavior") + return ValidationResult("can_use_tool handling", True, f"behavior={behavior}") + + +async def scenario_mcp_message(cli: StreamJsonCli) -> ValidationResult: + print("-- client -> control_request.mcp_message") + await cli.send( + { + "type": "control_request", + "request_id": "validator-mcp", + "request": { + "subtype": "mcp_message", + "server_name": "nonexistent-server", + "message": {"jsonrpc": "2.0", "id": 1, "method": "tools/list"}, + }, + } + ) + response = await cli.wait_for( + lambda env: env.get("type") == "control_response" and env.get("request_id") == "validator-mcp", + timeout=3, + description="control_response.mcp_message", + ) + success = response.get("success", False) + error = response.get("error") + expected_message = "is not configured" + if success or not isinstance(error, (str, dict)): + return ValidationResult("mcp_message error surface", False, f"Unexpected response: {response}") + message_text = error if isinstance(error, str) else str(error.get("message")) + ok = expected_message in message_text + detail = message_text if ok else f"Missing expected phrase in {message_text!r}" + return ValidationResult("mcp_message error surface", ok, detail) + + +async def scenario_prompt_tool_flow(cli: StreamJsonCli) -> ValidationResult: + print("-- client -> user message (trigger tool use)") + await cli.send( + { + "type": "user", + "message": { + "role": "user", + "content": [ + {"type": "text", "text": "请执行 ls 并展示结果"}, + ], + }, + } + ) + try: + envelope = await cli.wait_for( + lambda env: env.get("type") == "control_request" + and (env.get("request") or {}).get("subtype") == "can_use_tool", + timeout=15, + description="control_request.can_use_tool (prompt driven)", + ) + request_payload = envelope.get("request") or {} + tool_name = request_payload.get("tool_name") + return ValidationResult( + "Prompt -> can_use_tool", + True, + f"received tool request for {tool_name!r}", + ) + except TimeoutError as exc: + return ValidationResult( + "Prompt -> can_use_tool", + False, + f"no tool request observed (credentials required?): {exc}", + ) + + +async def scenario_tool_result_chain(cli: StreamJsonCli) -> ValidationResult: + print("-- client -> user tool_result + parent_tool_use_id") + await cli.send( + { + "type": "user", + "message": { + "content": [ + { + "type": "tool_result", + "tool_use_id": "validator-tool-2", + "content": [{"type": "text", "text": "模拟工具输出"}], + "is_error": False, + } + ], + }, + "parent_tool_use_id": "validator-tool-2", + } + ) + # send a follow-up hook callback to ensure the CLI remains responsive + await cli.send( + { + "type": "control_request", + "request_id": "validator-hook-followup", + "request": { + "subtype": "hook_callback", + "callback_id": "validator-hook-1", + }, + } + ) + response = await cli.wait_for( + lambda env: env.get("type") == "control_response" + and env.get("request_id") == "validator-hook-followup", + timeout=3, + description="post-tool hook callback", + ) + if not response.get("success"): + return ValidationResult( + "Tool result linkage", + False, + str(response.get("error") or "follow-up hook failed"), + ) + return ValidationResult( + "Tool result linkage", + True, + "follow-up hook callback succeeded", + ) + + +async def run_validation(verbose: bool) -> List[ValidationResult]: + command = build_command() + results: List[ValidationResult] = [] + async with StreamJsonCli(command, verbose=verbose) as cli: + scenarios: list[tuple[str, Callable[[StreamJsonCli], Awaitable[ValidationResult]]]] = [ + ("Initialize / Capabilities", scenario_initialize), + ("Hook Callback", scenario_hook_callback), + ("Can Use Tool", scenario_can_use_tool), + ("MCP Bridge", scenario_mcp_message), + ] + + if os.environ.get("STREAM_JSON_VALIDATE_PROMPT") == "1": + scenarios.append(("Prompt -> can_use_tool", scenario_prompt_tool_flow)) + if os.environ.get("STREAM_JSON_VALIDATE_TOOL_RESULT") == "1": + scenarios.append(("Tool result linkage", scenario_tool_result_chain)) + + for index, (label, scenario) in enumerate(scenarios, start=1): + separator = f"\n===== [{index}/{len(scenarios)}] {label} =====" + print(separator) + try: + result = await scenario(cli) + except TimeoutError as exc: + result = ValidationResult(label, False, str(exc)) + except Exception as exc: # pragma: no cover - unexpected path + result = ValidationResult(label, False, repr(exc)) + results.append(result) + return results + + +def render_results(results: List[ValidationResult]) -> None: + name_width = max(len(r.name) for r in results) + 2 + status_col = "Status" + detail_col = "Detail" + print("-" * (name_width + 40)) + print(f"{'Scenario'.ljust(name_width)}{status_col:<8}{detail_col}") + print("-" * (name_width + 40)) + for res in results: + status = "✅ OK" if res.success else "❌ FAIL" + print(f"{res.name.ljust(name_width)}{status:<8}{res.detail}") + print("-" * (name_width + 40)) + + +async def async_main(verbose: bool) -> int: + results = await run_validation(verbose=verbose) + render_results(results) + return 0 if all(r.success for r in results) else 1 + + +def main() -> int: + verbose = os.environ.get("STREAM_JSON_VALIDATE_VERBOSE", "1") != "0" + try: + return asyncio.run(async_main(verbose)) + except KeyboardInterrupt: + return 130 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/rfc/qwen-code-agent-framework-rfc_agentic_cn.md b/docs/rfc/qwen-code-agent-framework-rfc_agentic_cn.md new file mode 100644 index 000000000..4c4fe29a6 --- /dev/null +++ b/docs/rfc/qwen-code-agent-framework-rfc_agentic_cn.md @@ -0,0 +1,825 @@ +# Qwen-Code Agent 框架补充清单(Agentic 加强版) + +本文聚焦对现有《Qwen-Code Agent 框架架构设计(整理版)》的补充点,目标是在 SDK 侧补齐 agentic 能力,而非简单的 CLI 包装。所有建议均对齐 `@third-party/anthropics/claude-agent-sdk-python/` 已有实现。 + +## Agent 会话循环能力 + +- **结论**:需补充状态化的 `QwenAgentClient` 设计,支持流式连接、Hook 注入与 SDK MCP 注册,让 IDE / 应用能够在同一会话内执行多轮对话、动态插入消息并与控制协议交互。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` +```python + async def connect( + self, prompt: str | AsyncIterable[dict[str, Any]] | None = None + ) -> None: + """Connect to Claude with a prompt or message stream.""" + + from ._internal.query import Query + from ._internal.transport.subprocess_cli import SubprocessCLITransport + + async def _empty_stream() -> AsyncIterator[dict[str, Any]]: + return + yield {} # type: ignore[unreachable] + + actual_prompt = _empty_stream() if prompt is None else prompt + + if self.options.can_use_tool: + if isinstance(prompt, str): + raise ValueError( + "can_use_tool callback requires streaming mode. " + "Please provide prompt as an AsyncIterable instead of a string." + ) + + if self.options.permission_prompt_tool_name: + raise ValueError( + "can_use_tool callback cannot be used with permission_prompt_tool_name. " + "Please use one or the other." + ) + + options = replace(self.options, permission_prompt_tool_name="stdio") + else: + options = self.options + + if self._custom_transport: + self._transport = self._custom_transport + else: + self._transport = SubprocessCLITransport( + prompt=actual_prompt, + options=options, + ) + await self._transport.connect() + + sdk_mcp_servers = {} + if self.options.mcp_servers and isinstance(self.options.mcp_servers, dict): + for name, config in self.options.mcp_servers.items(): + if isinstance(config, dict) and config.get("type") == "sdk": + sdk_mcp_servers[name] = config["instance"] + + self._query = Query( + transport=self._transport, + is_streaming_mode=True, + can_use_tool=self.options.can_use_tool, + hooks=self._convert_hooks_to_internal_format(self.options.hooks) + if self.options.hooks + else None, + sdk_mcp_servers=sdk_mcp_servers, + ) +``` + +## 会话上下文管理 + +- **结论**:需展示 `QwenAgentClient` 应支持异步上下文管理与显式断连,确保资源在 Agent 会话结束后正确释放。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` +```python + async def disconnect(self) -> None: + if self._query: + await self._query.close() + self._query = None + self._transport = None + + async def __aenter__(self) -> "ClaudeSDKClient": + await self.connect() + return self + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> bool: + await self.disconnect() + return False +``` + +## 自定义 Transport 注入 + +- **结论**:需允许 SDK 注入自定义 Transport,以便接入远程 CLI 或代理服务,实现更灵活的部署拓扑。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` 与 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/client.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` +```python + def __init__( + self, + options: ClaudeAgentOptions | None = None, + transport: Transport | None = None, + ): + if options is None: + options = ClaudeAgentOptions() + self.options = options + self._custom_transport = transport + self._transport: Transport | None = None +``` + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/client.py` +```python + if transport is not None: + chosen_transport = transport + else: + chosen_transport = SubprocessCLITransport( + prompt=prompt, options=configured_options + ) + + await chosen_transport.connect() +``` + +## 会话消息流管理 + +- **结论**:需描述会话在连接后如何持续发送与接收消息,包括异步推送追加消息、等待模型响应以及在同一上下文中进行多轮交互。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` 与 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` +```python + async def query( + self, prompt: str | AsyncIterable[dict[str, Any]], session_id: str = "default" + ) -> None: + if not self._query or not self._transport: + raise CLIConnectionError("Not connected. Call connect() first.") + + if isinstance(prompt, str): + message = { + "type": "user", + "message": {"role": "user", "content": prompt}, + "parent_tool_use_id": None, + "session_id": session_id, + } + await self._transport.write(json.dumps(message) + "\n") + else: + async for msg in prompt: + if "session_id" not in msg: + msg["session_id"] = session_id + await self._transport.write(json.dumps(msg) + "\n") + + async def receive_messages(self) -> AsyncIterator[Message]: + if not self._query: + raise CLIConnectionError("Not connected. Call connect() first.") + + from ._internal.message_parser import parse_message + + async for data in self._query.receive_messages(): + yield parse_message(data) +``` + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py` +```python + async def stream_input(self, stream: AsyncIterable[dict[str, Any]]) -> None: + try: + async for message in stream: + if self._closed: + break + await self.transport.write(json.dumps(message) + "\n") + await self.transport.end_input() + except Exception as e: + logger.debug(f"Error streaming input: {e}") + + async def receive_messages(self) -> AsyncIterator[dict[str, Any]]: + async for message in self._message_receive: + if message.get("type") == "end": + break + elif message.get("type") == "error": + raise Exception(message.get("error", "Unknown error")) + + yield message +``` + +## 单次查询模式 + +- **结论**:需提供便捷的单次查询入口,便于脚本化或批处理场景通过 `query()` 直接获取异步消息流。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/query.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/query.py` +```python +async def query( + *, + prompt: str | AsyncIterable[dict[str, Any]], + options: ClaudeAgentOptions | None = None, + transport: Transport | None = None, +) -> AsyncIterator[Message]: + if options is None: + options = ClaudeAgentOptions() + + os.environ["CLAUDE_CODE_ENTRYPOINT"] = "sdk-py" + + client = InternalClient() + + async for message in client.process_query( + prompt=prompt, options=options, transport=transport + ): + yield message +``` + +## 控制协议治理 + +- **结论**:需要在 RFC 中明确 `can_use_tool`、Hook 回调与 SDK MCP 控制请求的全链路处理,包括权限结果结构与错误返回,确保 Qwen SDK 能覆盖真实的 Agent 审批/反馈流程。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py` 与 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py` +```python + if subtype == "can_use_tool": + permission_request: SDKControlPermissionRequest = request_data + if not self.can_use_tool: + raise Exception("canUseTool callback is not provided") + + context = ToolPermissionContext( + signal=None, + suggestions=permission_request.get("permission_suggestions", []) + or [], + ) + + response = await self.can_use_tool( + permission_request["tool_name"], + permission_request["input"], + context, + ) + + if isinstance(response, PermissionResultAllow): + response_data = {"behavior": "allow"} + if response.updated_input is not None: + response_data["updatedInput"] = response.updated_input + if response.updated_permissions is not None: + response_data["updatedPermissions"] = [ + permission.to_dict() + for permission in response.updated_permissions + ] + elif isinstance(response, PermissionResultDeny): + response_data = {"behavior": "deny", "message": response.message} + if response.interrupt: + response_data["interrupt"] = response.interrupt + else: + raise TypeError( + "Tool permission callback must return PermissionResult (PermissionResultAllow or PermissionResultDeny), got {type(response)}" + ) + + elif subtype == "hook_callback": + hook_callback_request: SDKHookCallbackRequest = request_data + callback_id = hook_callback_request["callback_id"] + callback = self.hook_callbacks.get(callback_id) + if not callback: + raise Exception(f"No hook callback found for ID: {callback_id}") + + response_data = await callback( + request_data.get("input"), + request_data.get("tool_use_id"), + {"signal": None}, + ) + + elif subtype == "mcp_message": + server_name = request_data.get("server_name") + mcp_message = request_data.get("message") + if not server_name or not mcp_message: + raise Exception("Missing server_name or message for MCP request") + + mcp_response = await self._handle_sdk_mcp_request( + server_name, mcp_message + ) + response_data = {"mcp_response": mcp_response} +``` + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py` +```python +@dataclass +class PermissionResultAllow: + behavior: Literal["allow"] = "allow" + updated_input: dict[str, Any] | None = None + updated_permissions: list[PermissionUpdate] | None = None + + +@dataclass +class PermissionResultDeny: + behavior: Literal["deny"] = "deny" + message: str = "" + interrupt: bool = False +``` + +## SDK MCP 工具桥接 + +- **结论**:需增加内嵌 MCP 工具服务器与 JSON-RPC 桥接的设计说明,使 Qwen SDK 能像 Claude SDK 一样直接在宿主进程中托管工具,避免额外子进程。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/__init__.py` 与 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/__init__.py` +```python +def create_sdk_mcp_server( + name: str, version: str = "1.0.0", tools: list[SdkMcpTool[Any]] | None = None +) -> McpSdkServerConfig: + from mcp.server import Server + from mcp.types import TextContent, Tool + + server = Server(name, version=version) + + if tools: + tool_map = {tool_def.name: tool_def for tool_def in tools} + + @server.list_tools() + async def list_tools() -> list[Tool]: + tool_list = [] + for tool_def in tools: + if isinstance(tool_def.input_schema, dict): + if ( + "type" in tool_def.input_schema + and "properties" in tool_def.input_schema + ): + schema = tool_def.input_schema + else: + properties = {} + for param_name, param_type in tool_def.input_schema.items(): + if param_type is str: + properties[param_name] = {"type": "string"} + elif param_type is int: + properties[param_name] = {"type": "integer"} + elif param_type is float: + properties[param_name] = {"type": "number"} + elif param_type is bool: + properties[param_name] = {"type": "boolean"} + else: + properties[param_name] = {"type": "string"} + schema = { + "type": "object", + "properties": properties, + "required": list(properties.keys()), + } + else: + schema = {"type": "object", "properties": {}} + + tool_list.append( + Tool( + name=tool_def.name, + description=tool_def.description, + inputSchema=schema, + ) + ) + return tool_list + + @server.call_tool() + async def call_tool(name: str, arguments: dict[str, Any]) -> Any: + if name not in tool_map: + raise ValueError(f"Tool '{name}' not found") + + tool_def = tool_map[name] + return await tool_def.handler(arguments) + + return { + "type": "sdk", + "name": name, + "instance": server, + } +``` + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py` +```python + async def _handle_sdk_mcp_request( + self, server_name: str, message: dict[str, Any] + ) -> dict[str, Any]: + if server_name not in self.sdk_mcp_servers: + return { + "jsonrpc": "2.0", + "id": message.get("id"), + "error": { + "code": -32601, + "message": f"Server '{server_name}' not found", + }, + } + + server = self.sdk_mcp_servers[server_name] + method = message.get("method") + params = message.get("params", {}) + + if method == "initialize": + return { + "jsonrpc": "2.0", + "id": message.get("id"), + "result": { + "protocolVersion": "2024-11-05", + "capabilities": {"tools": {}}, + "serverInfo": { + "name": server.name, + "version": server.version or "1.0.0", + }, + }, + } + + elif method == "tools/list": + request = ListToolsRequest(method=method) + handler = server.request_handlers.get(ListToolsRequest) + if handler: + result = await handler(request) + tools_data = [ + { + "name": tool.name, + "description": tool.description, + "inputSchema": ( + tool.inputSchema.model_dump() + if hasattr(tool.inputSchema, "model_dump") + else tool.inputSchema + ) + if tool.inputSchema + else {}, + } + for tool in result.root.tools + ] + return { + "jsonrpc": "2.0", + "id": message.get("id"), + "result": {"tools": tools_data}, + } + + elif method == "tools/call": + call_request = CallToolRequest( + method=method, + params=CallToolRequestParams( + name=params.get("name"), arguments=params.get("arguments", {}) + ), + ) + handler = server.request_handlers.get(CallToolRequest) + if handler: + result = await handler(call_request) + content = [] + for item in result.root.content: + if hasattr(item, "text"): + content.append({"type": "text", "text": item.text}) + elif hasattr(item, "data") and hasattr(item, "mimeType"): + content.append( + { + "type": "image", + "data": item.data, + "mimeType": item.mimeType, + } + ) + + response_data = {"content": content} + if hasattr(result.root, "is_error") and result.root.is_error: + response_data["is_error"] = True + + return { + "jsonrpc": "2.0", + "id": message.get("id"), + "result": response_data, + } +``` + +## 会话动态控制 API + +- **结论**:需要在文档中列出会话级控制接口(中断、动态切换权限模式、热切模型等),指导上层如何通过控制协议操作运行中的 Agent。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py` +```python + async def interrupt(self) -> None: + await self._send_control_request({"subtype": "interrupt"}) + + async def set_permission_mode(self, mode: str) -> None: + await self._send_control_request( + { + "subtype": "set_permission_mode", + "mode": mode, + } + ) + + async def set_model(self, model: str | None) -> None: + await self._send_control_request( + { + "subtype": "set_model", + "model": model, + } + ) +``` + +## 会话初始化信息 + +- **结论**:需支持在初始化后回读 CLI 返回的能力信息(命令、输出风格等),方便上层根据 Agent 能力动态调整界面或策略。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` +```python + async def get_server_info(self) -> dict[str, Any] | None: + if not self._query: + raise CLIConnectionError("Not connected. Call connect() first.") + return getattr(self._query, "_initialization_result", None) +``` + +## 结果响应迭代器 + +- **结论**:需提供自动截断到 `ResultMessage` 的便捷迭代器,帮助上层准确获取完整回应并停止读取。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/client.py` +```python + async def receive_response(self) -> AsyncIterator[Message]: + async for message in self.receive_messages(): + yield message + if isinstance(message, ResultMessage): + return +``` + +## CLI 编排与多 Agent 配置 + +- **结论**:需补充 CLI 参数编排与多 Agent 配置的映射关系,让 SDK 端 `QwenAgentOptions` 能生成正确的子进程命令与配置传递,包括 MCP、Agent 定义及设置源。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py` +```python + def _build_command(self) -> list[str]: + cmd = [self._cli_path, "--output-format", "stream-json", "--verbose"] + + if self._options.allowed_tools: + cmd.extend(["--allowedTools", ",".join(self._options.allowed_tools)]) + + if self._options.mcp_servers: + if isinstance(self._options.mcp_servers, dict): + servers_for_cli: dict[str, Any] = {} + for name, config in self._options.mcp_servers.items(): + if isinstance(config, dict) and config.get("type") == "sdk": + sdk_config: dict[str, object] = { + k: v for k, v in config.items() if k != "instance" + } + servers_for_cli[name] = sdk_config + else: + servers_for_cli[name] = config + + if servers_for_cli: + cmd.extend( + [ + "--mcp-config", + json.dumps({"mcpServers": servers_for_cli}), + ] + ) + else: + cmd.extend(["--mcp-config", str(self._options.mcp_servers)]) + + if self._options.agents: + agents_dict = { + name: {k: v for k, v in asdict(agent_def).items() if v is not None} + for name, agent_def in self._options.agents.items() + } + cmd.extend(["--agents", json.dumps(agents_dict)]) + + sources_value = ( + ",".join(self._options.setting_sources) + if self._options.setting_sources is not None + else "" + ) + cmd.extend(["--setting-sources", sources_value]) + + if self._is_streaming: + cmd.extend(["--input-format", "stream-json"]) + else: + cmd.extend(["--print", "--", str(self._prompt)]) + + return cmd +``` + +## 工作目录与环境注入 + +- **结论**:需明确 CLI 子进程可继承自定义工作目录、环境变量与运行用户,确保 Agent 在不同项目根与权限模型下稳定运作。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py` +```python + process_env = { + **os.environ, + **self._options.env, + "CLAUDE_CODE_ENTRYPOINT": "sdk-py", + "CLAUDE_AGENT_SDK_VERSION": __version__, + } + + if self._cwd: + process_env["PWD"] = self._cwd + + self._process = await anyio.open_process( + cmd, + stdin=PIPE, + stdout=PIPE, + stderr=stderr_dest, + cwd=self._cwd, + env=process_env, + user=self._options.user, + ) +``` + +## 部分消息与调试输出 + +- **结论**:需说明 SDK 对部分消息流与调试 stderr 的处理方式,使 Agent 能在复杂界面中增量输出并捕获底层诊断信息。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py` +```python + if self._options.include_partial_messages: + cmd.append("--include-partial-messages") + + should_pipe_stderr = ( + self._options.stderr is not None + or "debug-to-stderr" in self._options.extra_args + ) + + if should_pipe_stderr and self._process.stderr: + self._stderr_stream = TextReceiveStream(self._process.stderr) + self._stderr_task_group = anyio.create_task_group() + await self._stderr_task_group.__aenter__() + self._stderr_task_group.start_soon(self._handle_stderr) +``` + +## 缓冲与版本检查 + +- **结论**:需记录 SDK 内建的输出缓冲上限与 CLI 版本检查逻辑,保障 Agent 在异常输出或低版本环境下具备自诊断能力。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/subprocess_cli.py` +```python + self._max_buffer_size = ( + options.max_buffer_size + if options.max_buffer_size is not None + else _DEFAULT_MAX_BUFFER_SIZE + ) + + if len(json_buffer) > self._max_buffer_size: + buffer_length = len(json_buffer) + json_buffer = "" + raise SDKJSONDecodeError( + f"JSON message exceeded maximum buffer size of {self._max_buffer_size} bytes", + ValueError( + f"Buffer size {buffer_length} exceeds limit {self._max_buffer_size}" + ), + ) + + async def _check_claude_version(self) -> None: + # ... + if version_parts < min_parts: + warning = ( + f"Warning: Claude Code version {version} is unsupported in the Agent SDK. " + f"Minimum required version is {MINIMUM_CLAUDE_CODE_VERSION}. " + "Some features may not work correctly." + ) + logger.warning(warning) + print(warning, file=sys.stderr) +``` + +## 配置选项映射 + +- **结论**:需在文档中罗列 `QwenAgentOptions` 对应的核心配置项(工具权限、Hook、MCP、Agent 定义、工作目录等),确保宿主应用能够一站式传入会话所需的上下文。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py` +```python +@dataclass +class ClaudeAgentOptions: + allowed_tools: list[str] = field(default_factory=list) + system_prompt: str | SystemPromptPreset | None = None + mcp_servers: dict[str, McpServerConfig] | str | Path = field(default_factory=dict) + permission_mode: PermissionMode | None = None + continue_conversation: bool = False + resume: str | None = None + max_turns: int | None = None + disallowed_tools: list[str] = field(default_factory=list) + model: str | None = None + permission_prompt_tool_name: str | None = None + cwd: str | Path | None = None + settings: str | None = None + add_dirs: list[str | Path] = field(default_factory=list) + env: dict[str, str] = field(default_factory=dict) + extra_args: dict[str, str | None] = field(default_factory=dict) + max_buffer_size: int | None = None + debug_stderr: Any = sys.stderr + stderr: Callable[[str], None] | None = None + can_use_tool: CanUseTool | None = None + hooks: dict[HookEvent, list[HookMatcher]] | None = None + user: str | None = None + include_partial_messages: bool = False + fork_session: bool = False + agents: dict[str, AgentDefinition] | None = None + setting_sources: list[SettingSource] | None = None +``` + +## 内嵌工具定义 + +- **结论**:需强调通过装饰器快速注册 in-process 工具,并以 `SdkMcpTool` 结构封装元数据,以便 Qwen SDK 直接托管宿主进程内的函数型工具。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/__init__.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/__init__.py` +```python +@dataclass +class SdkMcpTool(Generic[T]): + name: str + description: str + input_schema: type[T] | dict[str, Any] + handler: Callable[[T], Awaitable[dict[str, Any]]] + + +def tool( + name: str, description: str, input_schema: type | dict[str, Any] +) -> Callable[[Callable[[Any], Awaitable[dict[str, Any]]]], SdkMcpTool[Any]]: + + def decorator( + handler: Callable[[Any], Awaitable[dict[str, Any]]], + ) -> SdkMcpTool[Any]: + return SdkMcpTool( + name=name, + description=description, + input_schema=input_schema, + handler=handler, + ) + + return decorator +``` + +## Hook 匹配与回调 + +- **结论**:需在 RFC 中补充 Hook 匹配配置与回调注册流程,说明 Agent 如何在特定事件点注入业务逻辑。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py` 与 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/query.py` +```python + hooks_config: dict[str, Any] = {} + if self.hooks: + for event, matchers in self.hooks.items(): + if matchers: + hooks_config[event] = [] + for matcher in matchers: + callback_ids = [] + for callback in matcher.get("hooks", []): + callback_id = f"hook_{self.next_callback_id}" + self.next_callback_id += 1 + self.hook_callbacks[callback_id] = callback + callback_ids.append(callback_id) + hooks_config[event].append( + { + "matcher": matcher.get("matcher"), + "hookCallbackIds": callback_ids, + } + ) + + request = { + "subtype": "initialize", + "hooks": hooks_config if hooks_config else None, + } +``` + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py` +```python +@dataclass +class HookMatcher: + matcher: str | None = None + hooks: list[HookCallback] = field(default_factory=list) +``` + +## 权限策略更新 + +- **结论**:需揭示权限更新的数据结构,使 SDK 支持返回细粒度的规则、目录及模式调整,保障企业级管控能力。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py` +```python +@dataclass +class PermissionUpdate: + type: Literal[ + "addRules", + "replaceRules", + "removeRules", + "setMode", + "addDirectories", + "removeDirectories", + ] + rules: list[PermissionRuleValue] | None = None + behavior: PermissionBehavior | None = None + mode: PermissionMode | None = None + directories: list[str] | None = None + destination: PermissionUpdateDestination | None = None + + def to_dict(self) -> dict[str, Any]: + result: dict[str, Any] = { + "type": self.type, + } + + if self.destination is not None: + result["destination"] = self.destination + + if self.type in ["addRules", "replaceRules", "removeRules"]: + if self.rules is not None: + result["rules"] = [ + { + "toolName": rule.tool_name, + "ruleContent": rule.rule_content, + } + for rule in self.rules + ] + if self.behavior is not None: + result["behavior"] = self.behavior + + elif self.type == "setMode": + if self.mode is not None: + result["mode"] = self.mode + + elif self.type in ["addDirectories", "removeDirectories"]: + if self.directories is not None: + result["directories"] = self.directories + + return result +``` + +## Transport 抽象 + +- **结论**:需指出 SDK 提供 `Transport` 抽象,便于在子进程外扩展远程或自定义通信层,形成更灵活的 Agent 部署方案。(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/__init__.py`) + +> 代码引用:`third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/__init__.py` +```python +class Transport(ABC): + @abstractmethod + async def connect(self) -> None: + pass + + @abstractmethod + async def write(self, data: str) -> None: + pass + + @abstractmethod + def read_messages(self) -> AsyncIterator[dict[str, Any]]: + pass + + @abstractmethod + async def close(self) -> None: + pass + + @abstractmethod + def is_ready(self) -> bool: + pass + + @abstractmethod + async def end_input(self) -> None: + pass +``` diff --git a/docs/rfc/qwen-code-agent-framework-rfc_clear_cn.md b/docs/rfc/qwen-code-agent-framework-rfc_clear_cn.md new file mode 100644 index 000000000..85c62b3fc --- /dev/null +++ b/docs/rfc/qwen-code-agent-framework-rfc_clear_cn.md @@ -0,0 +1,693 @@ +# Qwen-Code Agent 框架架构设计(整理版) + +## 概览 + +| 字段 | 详情 | +| --- | --- | +| 设计版本 | v1.1 | +| 最后更新 | 2025-10-10 | +| 架构范围 | Qwen-Code Agent SDK 与 qwen-code CLI 的子进程编排、控制协议、可观测与配置体系 | +| 关键目标 | 为第三方应用提供统一 IPC 通信、Worker 池治理、权限控制与工具桥接能力 | + +- **核心组件**: 文档聚焦 Qwen-Code Agent SDK, 在宿主进程内封装会话路由、控制协议与 Worker 池治理, 面向多语言场景提供统一接入。 +- **核心职能**: 会话调度与路由;CLI 子进程生命周期与资源治理;控制协议 Hook 与权限判定;轻量日志输出与可观测接入;观察性数据采集(日志、指标、追踪)。 +- **核心功能**: 支持同步/异步任务执行、流式输出、会话管理、错误处理与重试、In-Process MCP 工具桥接以及独立配置注入。 +- 面向多语言 SDK,统一封装 CLI 子进程生命周期与 JSONL 协议。 +- 提供会话调度、权限治理、Hook/MCP 回调、日志与指标采集的一致接口。 +- 以与 Claude Agent SDK 对齐的协议规范,降低多端协作与生态集成成本。 + +## 架构总览 + +| 层级 | 主要组件 | 职责要点 | +| --- | --- | --- | +| 上游客户端 | TypeScript / Python / Go / Java SDK | 暴露标准 API,适配各语言生态,负责会话接入与自定义工具注册 | +| Agent SDK 内核 | Router、ControlPlane、WorkerPool、ProcessMgr、IPC | 管理会话路由、权限回调、进程池生命周期与 STDIO JSONL 传输 | +| CLI Worker | qwen-code CLI 子进程 | 执行模型推理与工具调用,按照协议输出 `chat.completion*` / `result/*` / `control_request` | +| 周边服务 | LLM/MCP、监控、日志、追踪 | 提供模型服务、可观测数据与外部系统集成能力 | + +```mermaid +flowchart LR + subgraph Clients["第三方应用 / 服务"] + direction LR + TypeScriptSDK["qwen-agent-sdk
TypeScript"] + PythonSDK["qwen-agent-sdk
Python"] + GoSDK["qwen-agent-sdk
Go (TODO)"] + JavaSDK["qwen-agent-sdk
Java (TODO)"] + end + + subgraph AgentSDK["Qwen-Code Agent SDK"] + direction TB + Router["会话调度
路由 / 负载均衡"] + ControlPlane["控制协议
Hook / 权限判定"] + WorkerPool["Worker 池管理
分配 / 回收 / 健康检查"] + ProcessMgr["子进程管理
启动 / 监控 / 重启"] + IPC["IPC 适配层
STDIN/STDOUT JSONL"] + end + + subgraph Workers["qwen-code CLI Workers"] + direction LR + Worker1["Worker #1
qwen-code CLI"] + Worker2["Worker #2
qwen-code CLI"] + WorkerN["Worker #N"] + end + + subgraph Services["外围服务"] + LLM_MCP["大模型服务/MCP 服务"] + Monitor["监控告警"] + Logger["日志中心"] + Trace["链路追踪"] + end + + Clients --> Router + Router --> ControlPlane + Router --> WorkerPool + WorkerPool --> ProcessMgr + ProcessMgr --> IPC + ControlPlane -->|control_response| IPC + IPC -->|control_request| ControlPlane + IPC --> Worker1 + IPC --> Worker2 + IPC --> WorkerN + + Worker1 --> LLM_MCP + Worker2 --> LLM_MCP + WorkerN --> LLM_MCP + + Router --> Monitor + Router --> Logger + Router --> Trace + IPC -->|result/*| ControlPlane + ControlPlane -->|request| IPC + + classDef clientStyle fill:#e67e22,stroke:#ba6c1e,color:#fff + classDef sdkStyle fill:#f39c12,stroke:#ca7e08,color:#fff + classDef workerStyle fill:#16a085,stroke:#138d75,color:#fff + classDef serviceStyle fill:#95a5a6,stroke:#707b7c,color:#fff + class Clients,TypeScriptSDK,PythonSDK,GoSDK,JavaSDK clientStyle + class AgentSDK,Router,ControlPlane,ProcessMgr,IPC,WorkerPool sdkStyle + class Workers,Worker1,Worker2,WorkerN workerStyle + class Services,MCP,Monitor,Logger,Trace serviceStyle +``` + +- Agent SDK 与 CLI 共享 STDIN/STDOUT 双向 JSONL 通道,统一传输 `chat.completion*`、`result/*`、`control_request` 等事件。 +- 双向通信链路:CLI 逐行输出 `chat.completion`/`result/*`/`control_request` 至 stdout,SDK 解析后按需通过 stdin 回写 `request`/`control_response`,当出现 `control_request{subtype:"mcp_message"}` 时,ControlPlane 会将 JSON-RPC 转发至本地 MCP Server 并回传 `mcp_response`。 +- qwen-code CLI 已接入 OpenTelemetry,上报模型调用、工具执行、CLI 内部事件;Agent SDK 需独立接入,并通过 Trace/Span ID 串联端到端链路,构建统一排障视角。 +- 控制协议的事件语义与 CLI 输出格式规范保持一致,详见配套的 `stream-json` RFC。 +- 事件分类提示:关于 `result/*`、`request`、`control_request` 等事件的详细语义,请参阅《qwen-code-cli-output-format-stream-json-rfc_cn.md》的“事件机制分类”章节。 + +## 核心能力映射 + +| 能力域 | 关键内容 | 当前能力 | 后续演进 | +| --- | --- | --- | --- | +| 会话调度 | 会话路由、Worker 绑定、复用策略 | SDK 侧提供 Router、Worker 池调度 | 增强会话分支/子 Agent 调度 | +| 进程治理 | 子进程启动、监控、重启 | ProcessMgr 负责生命周期与资源限制 | 引入资源配额、故障自动隔离 | +| 控制协议 | 权限回调、Hook、MCP | ControlPlane 统一处理 `control_request` | 扩展更多 Hook 点与审批策略 | +| IPC 协议 | JSON Lines、输入输出格式 | IPC 层实现 `stream-json`/`stream-chunk-json` | 丰富事件类型、协议版本协商 | +| 可观测性 | 日志、指标、Trace | SDK 与 CLI 各自接入 OTel,并输出结构化日志 | 统一指标命名与跨组件追踪分析 | + +- 会话调度与控制层需要保证 Worker 独占机制、会话隔离与资源回收。 +- 控制协议以 request/response 模型运行,SDK 必须在超时前回写 `control_response`,确保 CLI 不被阻塞。 + +## Agentic 会话能力 + +| 能力块 | 设计重点 | 接口/结构 | +| --- | --- | --- | +| 会话循环 | 状态化 `QwenAgentClient`,区分连接与消息流阶段 | `connect()`、`disconnect()`、`Query.initialize()` | +| 会话上下文 | 支持异步上下文管理与资源释放 | `__aenter__` / `__aexit__`、`async with QwenAgentClient` | +| 消息流处理 | 同一会话内追加输入并消费流式输出 | `query()`、`stream_input()`、`receive_messages()`、`receive_response()` | +| 动态控制 | 运行时切换模型/权限、触发中断并回读初始化能力 | `interrupt()`、`set_permission_mode()`、`set_model()`、`get_server_info()` | +| 传输抽象 | 可插拔 Transport 支持本地/远程 CLI 与缓冲守护 | `Transport`、`SubprocessCLITransport`、`max_buffer_size` 校验 | + +### 会话循环与上下文 + +- `QwenAgentClient.connect()` 支持字符串与异步流模式,可在首次 `initialize` 时注入 Hook 与 SDK MCP Server,保障 IDE/服务多轮会话。 +- `disconnect()` 清理 `_query` 及 Transport,结合 `async with` 自动化连接/释放流程,避免遗留子进程。 +- 参数校验需提前阻止互斥选项(如 `can_use_tool` 与 `permission_prompt_tool_name` 同时启用),提升配置体验。 + +### 消息流处理 + +- `query()` 自动补全 `session_id` 并写入 JSONL,支持脚本/界面按需追加消息。 +- `stream_input()` 负责增量写入并在异常时清空缓冲、记录调试日志,维持长流程稳定性。 +- `receive_messages()` 遍历所有事件,`receive_response()` 检测到 `ResultMessage` 后提前收束,方便界面感知一次回答完成。 +- `include_partial_messages` 选项允许透传增量 chunk,配合 `stderr`/`debug_stderr` 捕获底层诊断输出,便于复杂界面实时渲染。 + +### 动态控制接口 + +- `_send_control_request` 支持 `interrupt`、`set_permission_mode`、`set_model` 等子类型,实现手动打断、权限模式切换与热切模型。 +- `get_server_info()` 回读 CLI 初始化能力(协议版本、指令集等),用于动态配置前端或策略。 +- `async query(...)->AsyncIterator` 暴露在脚本化场景,实现一次性消费完整响应,同时保留自定义 Transport 的可能。 + +### 单次查询与多 Agent 编排 + +- `query()` 快速入口封装了会话生命周期,适合集成脚本或批量任务;当传入自定义 `Transport` 时可复用远程 CLI 或 Agent 服务。 +- `agents` 配置将多 Agent 拓扑映射到 CLI `--agents` 参数,结合 `fork_session` 支持会话分叉与子 Agent 路由。 +- CLI 命令生成时需根据 `setting_sources`、`allowed_tools`、`mcp_servers` 等选项拼装 JSON,保证 SDK 与 CLI 能力一致。 + +### Hook 与 MCP 桥接 + +- Hook 初始化阶段将 `HookMatcher` 列表转换为 `hookCallbackIds`,`hook_callback` 事件据此回调宿主逻辑,支持 `PreToolUse`、`PostToolUse`、`UserPromptSubmit` 等关键节点。 +- SDK 侧 `create_sdk_mcp_server`/`defineTools` 在宿主进程构建 MCP Server,`mcp_message` 请求触发 `tools/list`、`tools/call`、`initialize` 等 JSON-RPC,避免额外子进程。 +- 权限审批通过 `PermissionResultAllow`/`PermissionResultDeny` 及 `PermissionUpdate` 结构返回细粒度策略,可在 Hook 或工具回调中按需调整。 +- **示例:PreToolUse 与 UserPromptSubmit 组合**(参考 `third-party/anthropics/claude-agent-sdk-python/examples/hooks.py`): + ```python + import asyncio + from qwen_agent_sdk import HookContext, HookMatcher, QwenAgentOptions, QwenSDKClient + + + async def block_risky_bash( + input_data: dict[str, object], + tool_use_id: str | None, + context: HookContext, + ) -> dict[str, object]: + command = input_data.get("command", "") if isinstance(input_data, dict) else "" + if isinstance(command, str) and "rm -rf" in command: + return { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": "阻止危险删除命令", + } + } + return {} + + + async def inject_user_memory( + input_data: dict[str, object], + tool_use_id: str | None, + context: HookContext, + ) -> dict[str, object]: + return { + "hookSpecificOutput": { + "hookEventName": "UserPromptSubmit", + "additionalContext": "请牢记用户最喜欢的颜色是蓝色。", + } + } + + + options = QwenAgentOptions( + allowed_tools=["Bash"], + hooks={ + "PreToolUse": [HookMatcher(matcher="Bash", hooks=[block_risky_bash])], + "UserPromptSubmit": [HookMatcher(matcher=None, hooks=[inject_user_memory])], + }, + ) + + + async def main() -> None: + async with QwenSDKClient(options=options) as client: + await client.query("尝试运行命令: rm -rf /tmp") + async for message in client.receive_response(): + ... + + + asyncio.run(main()) + ``` + +### 传输层扩展 + +- `Transport` 抽象允许替换默认的 `SubprocessCLITransport`,以适配远程 CLI、Agent 服务或容器托管场景。 +- 传输层在握手前执行版本检测与 `max_buffer_size` 限制,输出超限时抛出结构化错误并清理缓冲,防止内存膨胀。 +- 可通过选项控制工作目录、环境变量及 stderr 捕获,为 IDE 与服务端部署提供灵活拓扑。 +- `SubprocessCLITransport` 构建 CLI 命令时需注入 `--mcp-config`、`--agents`、`--setting-sources` 等参数,并在环境变量中写入 `CLAUDE_CODE_ENTRYPOINT`、自定义 `env`、`user` 配置以确保子进程能力对齐。 +- **示例:自定义远程 Transport**(参考 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/__init__.py`): + ```python + import json + from collections.abc import AsyncIterator + import httpx + from qwen_agent_sdk import QwenAgentOptions, QwenSDKClient + from qwen_agent_sdk.transport import Transport + + + class RemoteHTTPTransport(Transport): + def __init__(self, endpoint: str) -> None: + self._client = httpx.AsyncClient(base_url=endpoint, timeout=30.0) + self._ready = False + + async def connect(self) -> None: + self._ready = True + + async def write(self, data: str) -> None: + await self._client.post("/ingress", content=data.encode("utf-8")) + + def read_messages(self) -> AsyncIterator[dict[str, object]]: + async def iterator() -> AsyncIterator[dict[str, object]]: + async with self._client.stream("GET", "/egress") as response: + async for line in response.aiter_lines(): + if line: + yield json.loads(line) + return iterator() + + async def close(self) -> None: + await self._client.aclose() + self._ready = False + + def is_ready(self) -> bool: + return self._ready + + async def end_input(self) -> None: + await self._client.post("/ingress/close") + + + transport = RemoteHTTPTransport("https://cli-gateway.internal") + options = QwenAgentOptions(system_prompt="所有命令都通过远程 CLI 网关执行。") + + + async with QwenSDKClient(options=options, transport=transport) as client: + await client.query("读取 README.md 并总结三条要点。") + async for message in client.receive_response(): + ... + ``` + +### 调试与环境注入 + +- CLI 编排需根据 `QwenAgentOptions` 生成完整命令列,区分流式与一次性执行(`--input-format stream-json` 与 `--print`)。 +- `options.stderr` 与 `debug_stderr` 支持在 SDK 侧接管 CLI 调试输出,结合 anyio TaskGroup 实时读取 stderr 流。 +- `cwd`、`env`、`user` 等参数决定子进程工作目录与权限边界,SDK 应在启动时显式传入并在断连时回收资源。 +- **示例:错误回调与重试**(参考 `third-party/claude-agent-sdk-python-demo/add_my_permission.py`): + ```python + import anyio + from qwen_agent_sdk import ( + PermissionDecision, + PermissionRequest, + PermissionUpdate, + QwenAgentOptions, + QwenSDKClient, + ) + + + async def decide_permission(request: PermissionRequest) -> PermissionDecision: + if request.tool_name == "Write" and request.input and "rm -rf" in str(request.input): + return PermissionDecision(allow=False, reason="拒绝可能破坏工作的命令") + return PermissionDecision(allow=True, updates=[PermissionUpdate(mode="allow")]) + + + async def main() -> None: + options = QwenAgentOptions( + allowed_tools=["Read", "Write"], + permission_callback=decide_permission, + permission_mode="ask", + ) + + async with QwenSDKClient(options=options) as client: + try: + await client.query("删除所有临时文件并写入新的日志") + async for message in client.receive_response(): + ... + except Exception: + await client.query("改为仅清理 ./tmp 目录") + async for message in client.receive_response(): + ... + + + anyio.run(main) + ``` + +## SDK 实现概览 + +| 语言 | 运行时要求 | 分发形态 | 关键依赖 | 主要能力 | 状态 | +| --- | --- | --- | --- | --- | --- | +| Python | Python 3.10+ | `pyproject + hatchling`,命名空间 `qwen_agent_sdk`,发布 `py.typed` | `anyio>=4`、`typing_extensions`、`mcp>=0.1`、`pydantic>=2` | `query()` 快速入口、`QwenSDKClient`、工具注册、权限/Hook 回调、日志与 OTel | 首发实现 | +| TypeScript | Node.js 18+ | 包 `@qwen-agent/sdk`,ESM 默认导出,`tsup` 产物 | `@qwen-code/cli`、`zx/execa`、`eventemitter3` | `createAgentManager`、流式迭代、权限回调、MCP 工具、settingSources 控制 | 首发实现 | +| Go | 待定 | 待定 | 待定 | 复用控制协议,暴露通用 API | TODO | +| Java | 待定 | 待定 | 待定 | 面向企业场景的 SDK | TODO | + +### Python SDK 细节 + +- **运行时与分发**: 需 Python 3.10+;采用 `pyproject.toml + hatchling` 发布;提供 `qwen_agent_sdk` 命名空间与 `py.typed`。 +- **环境依赖**: 需预装 Node.js 及 `qwen-code` CLI,SDK 启动前通过 `which qwen` 或 `QWEN_BIN` 环境变量定位二进制。 +- **核心依赖**: `anyio>=4`、`typing_extensions`、`mcp>=0.1`、`pydantic>=2`。 +- **适用场景**:第三方后端服务、希望自定义交互体验或进行服务端调用的场景。 +- **API 设计**: + - `async def query(...) -> AsyncIterator[Message]`:对齐 Anthropic `query()` 的流式接口。 + - `class QwenSDKClient`:支持 `async with`、会话续写与中断管理。 + - `QwenAgentOptions`:包含 `system_prompt`、`setting_sources`、`permission_mode`、`cwd`、`fork_session` 等参数。 + - `@tool` 装饰器 + `create_sdk_mcp_server`:将 Python 函数注册为 MCP 工具。 +- **实现要点**: + - `StdIOSubprocessTransport` 启动 `qwen` CLI,写入 JSONL,读取流式 chunk。 + - `_handle_control_request()` 对 `can_use_tool`、`hook_callback`、`mcp_message` 等 `subtype` 执行回调,并写入 `control_response`。 + - **Hook 体系**:支持 `PreToolUse`、`PostToolUse`、`UserPromptSubmit` 等事件,可返回 JSON 指令以调整会话流程,与 Anthropic Hook JSON 保持一致。 + - `Query.initialize()` 首次发送 `control_request{subtype:"initialize"}`,同步 Hook 配置与能力声明。 + - 支持 `PermissionResult`、Hook JSON 与 MCP JSON-RPC 的统一封装。 +- **日志与可观测**: + - 默认输出结构化 JSON 日志,支持 `structlog` 注入。 + - **轻量日志约定**:遵循 `logging` 风格输出结构化 JSON,可通过 `options.stderr`/`debug_stderr` 捕获 CLI 原始错误流,加速排障。 + - 规划内置 OpenTelemetry Tracer/Meter,记录会话耗时、传输错误、Worker 利用率。 +- **健壮性**: + - CLI 崩溃时自动重试与会话 fork,保存最近成功结果实现断点续传。 + - 回调异常时返回 `control_response{subtype:"error"}`,触发 CLI 安全回退。 +- **测试体系**: + - `pytest + pytest-asyncio` 覆盖核心流程。 + - `ruff + mypy` 保证代码质量。 +- **示例:快速流式对话**(参考 `third-party/anthropics/claude-agent-sdk-python/examples/quick_start.py`): + ```python + import anyio + from qwen_agent_sdk import ( + AssistantMessage, + QwenAgentOptions, + ResultMessage, + TextBlock, + query, + ) + + + async def main() -> None: + options = QwenAgentOptions( + system_prompt="你是一名示例助手,回答保持简洁。", + allowed_tools=["Read"], + max_turns=1, + ) + + async for message in query(prompt="2 + 2 等于多少?", options=options): + if isinstance(message, AssistantMessage): + for block in message.content: + if isinstance(block, TextBlock): + print(block.text) + elif isinstance(message, ResultMessage) and message.total_cost_usd: + print(f"本轮花费: ${message.total_cost_usd:.4f}") + + + anyio.run(main) + ``` +- **示例:多 Agent 与设置源**(参考 `third-party/anthropics/claude-agent-sdk-python/examples/agents.py`、`third-party/anthropics/claude-agent-sdk-python/examples/setting_sources.py`): + ```python + from qwen_agent_sdk import AgentDefinition, QwenAgentOptions, query + + + options = QwenAgentOptions( + agents={ + "doc-writer": AgentDefinition( + description="输出结构化说明", + prompt="你是资深文档工程师,请附带步骤解释。", + tools=["Read", "Write"], + ), + "tester": AgentDefinition( + description="生成并运行测试", + prompt="你负责编写测试并验证结果。", + tools=["Read", "Write", "Bash"], + ), + }, + setting_sources=["user", "project"], + ) + + + async for message in query( + prompt="请调用 doc-writer 说明 AgentDefinition 的用途。", + options=options, + ): + ... + ``` +- **示例:内嵌 MCP 工具**(参考 `third-party/anthropics/claude-agent-sdk-python/examples/mcp_calculator.py`): + ```python + from qwen_agent_sdk import ( + AssistantMessage, + QwenAgentOptions, + QwenSDKClient, + TextBlock, + create_sdk_mcp_server, + tool, + ) + + + @tool("add", "计算两个数之和", {"a": float, "b": float}) + async def add_numbers(args: dict[str, float]) -> dict[str, object]: + total = args["a"] + args["b"] + return {"content": [{"type": "text", "text": f"{args['a']} + {args['b']} = {total}"}]} + + + calculator = create_sdk_mcp_server( + name="calculator", + version="1.0.0", + tools=[add_numbers], + ) + + + options = QwenAgentOptions( + mcp_servers={"calc": calculator}, + allowed_tools=["mcp__calc__add"], + ) + + + async with QwenSDKClient(options=options) as client: + await client.query("请调用 mcp__calc__add 计算 6 + 7。") + async for message in client.receive_response(): + if isinstance(message, AssistantMessage): + for block in message.content: + if isinstance(block, TextBlock): + print(block.text) + else: + ... + ``` + +### TypeScript SDK 细节 + +- **运行时与分发**: 需 Node.js 18+;包名 `@qwen-agent/sdk`,默认 ESM 导出并通过 `exports` 暴露 CJS;使用 `tsup` 生成 `dist/esm`、`dist/cjs`、`dist/types`。 +- **核心依赖**: `@qwen-code/cli`(peerDependency)、`zx/execa`、`eventemitter3`。 +- **API 能力**: + - `createAgentManager(options)`:提供 `createSession`、`run`、`forkSession`。 + - `session.stream(task)`:返回 `AsyncIterable`,可 `for await` 消费。 + - `onPermissionRequest`:以 `allow/deny/ask` + 规则返回权限决策。 + - `settingSources`:默认关闭,需要显式声明 `["user","project","local"]` 等条目才会加载对应设置文件。 + - `defineTools`:注册 MCP 工具,与 CLI 会话共享上下文。 + - `agents` 选项:支持内联多 Agent 拓扑,结合 `forkSession` 构建子 Agent。 +- **实现要点**: + - 使用 `execa` 启动 CLI,统一解析 stdout 为 `AgentStreamChunk`。 + - `ProcessTransport` 逐行解码 stdout (`JSON.parse`),通过 `EventEmitter` 推送 `control_request`、`result/*`、`chat.completion*` 事件,所有反向 `control_response` 共用子进程 stdin。 + - 维持 `result/heartbeat` 定时器,超时自动重启 Worker。 + - `pendingControl` 映射配合 `request_id` 路由 `control_request`。 + - 回调 Promise 生成标准化 `control_response` payload;未注册时走默认策略。 + - `onPermissionRequest`、`onHookEvent` 等回调 Promise 化处理,统一生成 `control_response`,未注册时沿用默认策略避免 CLI 阻塞。 + - `defineTools()` 将 TS 函数组装为 in-process MCP server,透传 JSON-RPC。 + - 初始化阶段等待 CLI 首条 `chat.completion` 握手信息,并通过 `control_request{subtype:"initialize"}` 发送 Hook/工具能力。 + - 异常场景记录 verbose 日志并返回 `control_response{subtype:"error"}`。 +- **工程体系**: + - `tsup` 产出 `dist/esm`、`dist/cjs` 与 `dist/types`。 + - 测试矩阵使用 `vitest` + `tsx`,结合 CLI mock 校验流式输出、权限回调。 + - 提供 `enableVerboseLogging()` 开关、`logger`/`stderr` 回调以及 OpenTelemetry Trace 规划。 + +### 其它语言路线(TODO) + +- **Go/Java**:计划复用统一控制协议与 Worker 调度策略,面向企业场景与后端服务。 +- 将在 Python/TypeScript 稳定后补充 SDK 设计与实现细节。 + +## 控制协议与 MCP 集成 + +| 通道 | 发起方 | 典型事件 | 回执要求 | 目的 | +| --- | --- | --- | --- | --- | +| `chat.completion*` | CLI → SDK | 模型回复、工具调用、收尾摘要 | 无需回执 | 承载主流程消息 | +| `result/*` | CLI → SDK | `result/command`、`result/heartbeat`、`result/cancel`、`x-qwen-session-event` | 无需回执 | 发布状态与辅助信息 | +| `*request` | SDK → CLI | `command_hint_request`、`heartbeat_request`、`control/cancel` | CLI 以 `result/*` 响应 | 触发即时操作 | +| `control_request` | CLI → SDK | `can_use_tool`、`hook_callback`、`mcp_message` | 需回写 `control_response` | 执行权限判定、Hook、MCP 调用 | + +- 所有控制事件通过统一 STDIN/STDOUT 管道传输,SDK 必须确保在约定超时内响应。 +- MCP 集成在 SDK 进程内创建 server,无需额外子进程或网络服务。 +- 授权回调与 MCP 调用解耦:CLI 仍负责触发 `can_use_tool`,SDK 收到后执行权限逻辑,随后才能处理 `mcp_message`。 + +```mermaid +sequenceDiagram + participant CLI as qwen-code CLI (stdout/stdin) + participant Control as Agent SDK ControlPlane + participant MCP as In-Process MCP Server + participant Tool as 用户自定义工具 + + CLI->>Control: control_request (subtype="mcp_message",stdout) + Control->>MCP: JSON-RPC (tools/list | tools/call) + MCP->>Tool: 调用异步处理函数 + Tool-->>MCP: 处理结果 + MCP-->>Control: jsonrpc result (mcp_response) + Control-->>CLI: control_response (stdin) +``` + +- 初始化阶段通过 `control_request{subtype:"initialize"}` 同步 Hook 配置与能力声明。 +- 回调异常时,SDK 需记录日志并返回 `control_response{subtype:"error"}`,CLI 遵循安全回退策略。 + +| `control_request.subtype` | 输入要点 | SDK 响应约束 | 说明 | +| --- | --- | --- | --- | +| `initialize` | 携带 `hooks` 配置、可选 MCP 能力声明 | 返回 `control_response` 确认或报错;存档 `_initialization_result` | 需将 `HookMatcher` 中的回调映射为 `hookCallbackIds`,便于后续触发 | +| `can_use_tool` | 提供 `tool_name`、`input`、`permission_suggestions` | 返回 `behavior=allow/deny`,可附带 `updatedInput`、`updatedPermissions` | `updatedPermissions` 由 `PermissionUpdate` 数组组成,支持规则/目录/模式调整 | +| `hook_callback` | 提供 `callback_id`、Hook 输入上下文 | 查找对应回调并返回 JSON 结果 | 需在连接阶段缓存回调映射,未命中应返回结构化错误 | +| `mcp_message` | 提供 `server_name`、`message` JSON-RPC | 调用 SDK 内置 MCP server,封装 `mcp_response` | 支持 `initialize`、`tools/list`、`tools/call` 等标准方法,出错时返回 JSON-RPC 错误对象 | + +- `PermissionUpdate` 结构支持 `addRules`、`replaceRules`、`removeRules`、`setMode`、`addDirectories`、`removeDirectories` 等类型,需精确传递规则内容与目录集合以满足企业级权限治理。 +- Hook 配置允许多事件、多匹配器组合:`HookMatcher.matcher` 指定匹配条件,`hooks` 填写回调函数列表,SDK 在初始化时生成回调 ID,并在 `hook_callback` 阶段路由执行。 + +## 通信模式与 MCP 能力 + +| 模块 | 形态 | 关键说明 | +| --- | --- | --- | +| IPC 模式 | STDIN/STDOUT JSON Lines | SDK 启动本地 `qwen` 子进程,以 JSON Lines 进行通信;协议细节对齐《qwen-code-cli-output-format-stream-json-rfc_cn.md》,保持 `/`、`@`、`?` 指令的即时回执。 | +| In-Process MCP Server | SDK 内嵌 MCP Server | 依赖 `mcp>=0.1` 在宿主进程创建 MCP Server,透传 `@tool`/`defineTools` 定义的函数,无需额外子进程或网络服务。 | + +- **IPC 实现要点**: + - `SubprocessCLITransport`(或等价实现)通过同一 STDIN/STDOUT 管道处理正向/反向消息,无需额外套接字。 + - CLI 输出 `chat.completion`/`chat.completion.chunk` 时需在首条消息的 `metadata` 携带 `protocol_version`、`input_format`、`output_format`、`capabilities`。 + - 事件语义覆盖 `result/heartbeat`、`result/cancel`、`x-qwen-session-event`、`control_request/control_response`,并提供 OpenAI 风格错误对象。 +- **MCP 事件链路**: + - CLI 通过 `control_request{subtype:"mcp_message"}` 将 JSON-RPC 请求写入 stdout,SDK 转发给本地 MCP Server 执行 `tools/list`、`tools/call`。 + - 执行结果封装为 `control_response` 写回 stdin,形成闭环。 +- **授权分工与优势**: + - CLI 触发 `control_request{subtype:"can_use_tool"}` 由 SDK 决策授权,MCP 调用链路与权限判定解耦。 + - 工具执行下沉到 SDK 进程内,降低延迟;Hook 能力可沿同一通路后续接入,与 Claude Agent SDK 实践保持一致。 + +## Worker 池与复用 + +| 维度 | 设计要点 | 实施状态 | +| --- | --- | --- | +| 状态机 | 空闲 → 占用 → 空闲,单 Worker 独占会话 | 已设计 | +| 复用策略 | 会话结束不销毁进程,清理上下文后复用 | 规划落地 | +| 安全保障 | 会话隔离、资源清理、健康检查 | 随 Worker 池实现同步 | +| 配置项 | `min_workers`、`max_workers`、`idle_timeout`、`max_sessions_per_worker`、`health_check_interval` | 需在 SDK/CLI 配置中暴露 | +| 可观测 | 结构化日志、指标导出、Trace 链接 | SDK/CLI 各自接入 | + +- **环境说明**: Worker 本质是 qwen-code CLI 子进程,其容器/沙箱与工具桥接由 CLI 管理,SDK 仅通过 STDIN/STDOUT 进行调度与控制。 +- Worker 仅在单会话期间占用,保证隔离;会话结束后回到空闲池。 +- 复用依赖清理会话变量、关闭文件句柄、重置环境变量。 +- 健康检查覆盖内存泄漏、僵尸进程、卡死检测,异常时自动重启。 +- 典型配置示例: + +```yaml +worker_pool: + min_workers: 5 + max_workers: 50 + idle_timeout: 1800 # 30 分钟 + max_sessions_per_worker: 100 + health_check_interval: 60 +``` + +- 会话日志需记录 `session_id`、`prompt_id`、耗时、命令摘要,支持回传至集中式日志系统。 +- 指标采集聚焦活跃/空闲数量、排队时长、重启次数、失败率等,Trace 在 SDK → CLI → 工具调用间传播。 + +## 配置注入与设置管理 + +| 项目 | 能力描述 | 说明 | +| --- | --- | --- | +| `settings_profile` | 为单个 `QwenClient` 指定独立配置 | 影响该客户端维护的 Worker 池及子进程 | +| `system/system_defaults` | 传入绝对路径或 JSON overrides | 映射到 CLI `QWEN_CODE_SYSTEM_SETTINGS_PATH` 等环境变量 | +| `user/workspace` | JSON 对象或文件/目录路径 | SDK 在临时目录生成 `settings.json` 并挂载至 CLI | +| `overrides` | 键值覆盖,如 `model.name`、`tools.allowed` | 直接写入临时配置文件 | + +- **生态复用**:沿用 CLI 多层设置体系 (`SettingScope.System/SystemDefaults/User/Workspace`),不同 `QwenClient` 相互隔离,未提供 profile 时走 CLI 默认加载顺序。 +- **实现步骤**: + 1. 在 `QwenClientOptions`/`QwenAgentOptions` 新增 `settings_profile` 字段,Python/TypeScript SDK 均需支持。 + 2. Worker 池启动 CLI 前,将 profile 写入隔离目录,并设置 `--setting-sources`/`--settings` 或相关参数。 + 3. 写入 `QWEN_CODE_USER_SETTINGS_PATH`、`QWEN_CODE_WORKSPACE_SETTINGS_PATH` 等环境变量,以指向生成的临时配置文件。 + 4. Worker 池销毁时清理临时目录,避免配置泄漏。 +- **日志与排障**:日志中打印 profile 摘要(脱敏),便于排查配置错配。 +- **安全考量**:配置仅由宿主应用注入,不做共享路径回退,防止跨租户污染,需提醒妥善管理敏感 Token/路径。 +- **兼容性**:CLI 需补齐对新环境变量的解析,未识别的变量应回退默认行为(忽略)。 +- **现状提醒**:当前 CLI 尚未支持单子进程独立配置,需要后续 RFC/PR 推进 `--settings-profile` 及相关环境变量能力。 + +## Agent SDK 调度层能力 + +| 模块 | 核心职责 | 现状 | 后续工作 | +| --- | --- | --- | --- | +| IPC 封装 | JSON Lines 解析与写入,事件路由 | CLI 仍为文本 STDIN,需扩展 | 引入 `StdinReaderService`、`StdoutWriterService`、增强 `main()` | +| 进程管理 | 启动、监控、资源限制、日志 | 初步设计 | 建立资源配额与异常重启策略 | +| 控制协议 | 权限回调、Hook 注入 | CLI 仅有 ApprovalMode | 新增权限接口、Hook 体系与插桩 | +| 输入输出格式 | `--input-format/--output-format` | 需 CLI 支持 `stream-json` | 完成参数解析与 TUI 自动禁用 | +| 事件语义 | `result/heartbeat`、`control_request` 等 | 定义中 | 与 CLI 输出格式 RFC 对齐实现 | + +- `docs/ipc/qwen-chat-request-schema.json` 扩展自 OpenAI `/chat/completions`,增加 `session_id`、`prompt_id`、`origin`、`tool_call_id` 字段。 +- 错误语义需要对齐 CLI 输出格式:致命错误输出 OpenAI 风格错误对象;可恢复错误通过 `chat.completion` 表示。 +- 需要确保 `/`、`@`、`?` 指令请求的即时响应与事件派发。 + +## 可观测性与调试 + +| 领域 | 要点 | +| --- | --- | +| 日志 | SDK 默认结构化 JSON,CLI 可透传 stderr,支持 `logger` 注入 | +| 指标 | SDK/CLI 各自导出活跃 Worker、排队时长、错误数等,规划统一指标命名 | +| Trace | 生成会话级 Span,传播至 CLI 与工具调用链路,实现端到端排障 | +| 调试工具 | TypeScript 提供 `enableVerboseLogging()`,Python 支持捕获 `debug_stderr`,两侧均计划引入 OTel | + +- 第三方服务需记录消息序列,支持审计与问题重放。 +- CLI 命令示例可用于本地调试(详见输出格式 RFC)。 + +## 集成模式 + +| 模式 | 适用场景 | 关键特性 | +| --- | --- | --- | +| 宿主进程引入 SDK | IDE 插件、企业内部工具、CLI 扩展 | 直接在宿主进程启动 Worker 池,通过 IPC JSONL 与 CLI 通信,支持同步与流式输出 | + +**快速上手示例**: + +```python +from qwen_agent_sdk import QwenClient + +with QwenClient(binary_path="qwen", model="qwen3-coder-plus") as client: + result = client.chat( + task="扫描并修复 main.py 中的潜在 bug", + workspace="/repos/demo" + ) + print(result.summary) +``` + +- 第三方程序可依赖 `qwen-agent-sdk` 统一管理会话、工具与权限策略。 +- SDK 需支持会话重放与取消、心跳维持与超时控制。 +- **示例:一体化会话脚手架**(参考 `third-party/claude-agent-sdk-python-demo/quick_start_example.py`): + ```python + import anyio + from qwen_agent_sdk import ( + AssistantMessage, + QwenAgentOptions, + QwenSDKClient, + TextBlock, + create_sdk_mcp_server, + query, + tool, + ) + + + @tool("get_system_info", "获取系统信息", {}) + async def get_system_info(_: dict[str, object]) -> dict[str, object]: + import os + import platform + + summary = "\n".join( + [ + f"- 操作系统: {platform.system()} {platform.release()}", + f"- Python 版本: {platform.python_version()}", + f"- 当前目录: {os.getcwd()}", + ] + ) + return {"content": [{"type": "text", "text": summary}]} + + + async def run_all_examples() -> None: + # 1) 基础问答 + async for msg in query(prompt="你好,做个自我介绍。"): + if isinstance(msg, AssistantMessage): + for block in msg.content: + if isinstance(block, TextBlock): + print(block.text) + + # 2) 定制参数 + 流式消费 + options = QwenAgentOptions(system_prompt="你是一名资深助手,善于步骤化回答。") + async for msg in query("解释一下多进程和多线程的差异", options=options): + ... + + # 3) 注册自定义工具 + server = create_sdk_mcp_server(name="my-tools", version="1.0.0", tools=[get_system_info]) + async with QwenSDKClient( + options=QwenAgentOptions( + mcp_servers={"tools": server}, + allowed_tools=["mcp__tools__get_system_info"], + ) + ) as client: + await client.query("请获取当前运行环境") + async for msg in client.receive_response(): + ... + + + anyio.run(run_all_examples) + ``` + +## 开放事项与后续计划 + +| 方向 | 待完成事项 | +| --- | --- | +| CLI 协议支持 | 补齐 `stream-json` 输入输出、事件语义与握手元数据 | +| 控制协议 | 实现权限回调 API、Hook 插桩、MCP `mcp_message` 通路 | +| Worker 池 | 完善健康检查、资源配额、异常自动隔离策略 | +| 配置体系 | CLI 合入 `--settings-profile` 与相关环境变量支持 | +| 多语言 SDK | 丰富 Go/Java 版本,实现统一测试矩阵与文档 | +| 可观测 | 联合定义指标命名 & Trace 采样策略,完善日志规范 | diff --git a/docs/rfc/qwen-code-agent-framework-rfc_clear_en.md b/docs/rfc/qwen-code-agent-framework-rfc_clear_en.md new file mode 100644 index 000000000..5efe6fb62 --- /dev/null +++ b/docs/rfc/qwen-code-agent-framework-rfc_clear_en.md @@ -0,0 +1,687 @@ +# Qwen-Code Agent Framework Architecture Design (Clean Version) + +## Overview + +| Field | Details | +| --- | --- | +| Design Version | v1.1 | +| Last Updated | 2025-10-10 | +| Architecture Scope | Qwen-Code Agent SDK and qwen-code CLI subprocess orchestration, control protocol, observability, and configuration system | +| Key Objectives | Provide third-party applications with unified IPC communication, worker pool governance, permission control, and tool bridging capabilities | + +- **Core Component**: This document focuses on the Qwen-Code Agent SDK, which encapsulates session routing, control protocol handling, and worker pool governance inside the host process to deliver unified access for multi-language scenarios. +- **Core Responsibilities**: Session scheduling and routing; CLI subprocess lifecycle and resource governance; control protocol hooks and permission adjudication; lightweight logging and observability integration; telemetry collection (logs, metrics, traces). +- **Core Functions**: Supports synchronous/asynchronous task execution, streaming output, session management, error handling and retries, in-process MCP tool bridging, and isolated configuration injection. +- Wraps the CLI subprocess lifecycle and JSONL protocol for multi-language SDKs through a unified abstraction. +- Provides consistent interfaces for session scheduling, permission governance, Hook/MCP callbacks, logging, and metrics collection. +- Aligns protocol specifications with the Claude Agent SDK to reduce collaboration costs and ecosystem integration friction across platforms. + +## Architecture Overview + +| Layer | Key Components | Responsibility Highlights | +| --- | --- | --- | +| Upstream Clients | TypeScript / Python / Go / Java SDK | Expose standard APIs, adapt to each language ecosystem, handle session onboarding, and register custom tools | +| Agent SDK Core | Router, ControlPlane, WorkerPool, ProcessMgr, IPC | Manage session routing, permission callbacks, worker pool lifecycle, and STDIO JSONL transport | +| CLI Worker | qwen-code CLI subprocess | Execute model inference and tool invocations, emit `chat.completion*` / `result/*` / `control_request` according to the protocol | +| Peripheral Services | LLM/MCP, monitoring, logging, tracing | Provide model services, observability data, and integration with external systems | + +```mermaid +flowchart LR + subgraph Clients["Third-Party Applications / Services"] + direction LR + TypeScriptSDK["qwen-agent-sdk
TypeScript"] + PythonSDK["qwen-agent-sdk
Python"] + GoSDK["qwen-agent-sdk
Go (TODO)"] + JavaSDK["qwen-agent-sdk
Java (TODO)"] + end + + subgraph AgentSDK["Qwen-Code Agent SDK"] + direction TB + Router["Session Scheduling
Routing / Load Balancing"] + ControlPlane["Control Protocol
Hook / Permission Decisions"] + WorkerPool["Worker Pool Management
Allocation / Reclamation / Health Checks"] + ProcessMgr["Subprocess Management
Launch / Monitor / Restart"] + IPC["IPC Adaptation Layer
STDIN/STDOUT JSONL"] + end + + subgraph Workers["qwen-code CLI Workers"] + direction LR + Worker1["Worker #1
qwen-code CLI"] + Worker2["Worker #2
qwen-code CLI"] + WorkerN["Worker #N"] + end + + subgraph Services["Peripheral Services"] + LLM_MCP["LLM Services / MCP Services"] + Monitor["Monitoring & Alerts"] + Logger["Logging Center"] + Trace["Tracing"] + end + + Clients --> Router + Router --> ControlPlane + Router --> WorkerPool + WorkerPool --> ProcessMgr + ProcessMgr --> IPC + ControlPlane -->|control_response| IPC + IPC -->|control_request| ControlPlane + IPC --> Worker1 + IPC --> Worker2 + IPC --> WorkerN + + Worker1 --> LLM_MCP + Worker2 --> LLM_MCP + WorkerN --> LLM_MCP + + Router --> Monitor + Router --> Logger + Router --> Trace + IPC -->|result/*| ControlPlane + ControlPlane -->|request| IPC + + classDef clientStyle fill:#e67e22,stroke:#ba6c1e,color:#fff + classDef sdkStyle fill:#f39c12,stroke:#ca7e08,color:#fff + classDef workerStyle fill:#16a085,stroke:#138d75,color:#fff + classDef serviceStyle fill:#95a5a6,stroke:#707b7c,color:#fff + class Clients,TypeScriptSDK,PythonSDK,GoSDK,JavaSDK clientStyle + class AgentSDK,Router,ControlPlane,ProcessMgr,IPC,WorkerPool sdkStyle + class Workers,Worker1,Worker2,WorkerN workerStyle + class Services,LLM_MCP,Monitor,Logger,Trace serviceStyle +``` + +- The Agent SDK and CLI share a bidirectional STDIN/STDOUT JSONL channel that transports events such as `chat.completion*`, `result/*`, and `control_request`. +- Event flow: the CLI writes `chat.completion`/`result/*`/`control_request` line-by-line to stdout; the SDK parses them and, when needed, writes `request`/`control_response` back to stdin. When a `control_request{subtype:"mcp_message"}` appears, the ControlPlane forwards the JSON-RPC payload to the local MCP server and relays the resulting `mcp_response`. +- The qwen-code CLI already integrates with OpenTelemetry to report model calls, tool executions, and internal events; the Agent SDK must instrument independently and propagate trace/span IDs across the entire path to build a unified troubleshooting view. +- Control protocol semantics align with the CLI output format specification, as documented in the accompanying `stream-json` RFC. +- For details on event categories such as `result/*`, `request`, and `control_request`, refer to the “Event Classification” section of `qwen-code-cli-output-format-stream-json-rfc_cn.md`. + +## Core Capability Mapping + +| Capability Domain | Key Focus | Current Capability | Future Evolution | +| --- | --- | --- | --- | +| Session Scheduling | Session routing, worker binding, reuse strategies | Router and worker pool scheduling provided by the SDK | Enhance session branching and sub-agent scheduling | +| Process Governance | Subprocess launch, monitoring, restart | ProcessMgr oversees lifecycle and resource limits | Introduce resource quotas and automatic fault isolation | +| Control Protocol | Permission callbacks, hooks, MCP | ControlPlane handles `control_request` uniformly | Expand hook points and approval strategies | +| IPC Protocol | JSON Lines, input/output formats | IPC layer implements `stream-json` / `stream-chunk-json` | Enrich event types and support protocol negotiation | +| Observability | Logging, metrics, tracing | SDK and CLI both integrate OTel and emit structured logs | Standardize metric names and cross-component trace analysis | + +- The scheduling and control layers must enforce worker exclusivity, session isolation, and resource reclamation. +- The control protocol follows a request/response model; the SDK must return a `control_response` before timeout to prevent the CLI from blocking. + +## Agentic Session Capabilities + +| Capability Block | Design Focus | Interface / Structure | +| --- | --- | --- | +| Session Loop | Stateful `QwenAgentClient`, separating connection and message flow phases | `connect()`, `disconnect()`, `Query.initialize()` | +| Session Context | Support async context management and resource teardown | `__aenter__` / `__aexit__`, `async with QwenAgentClient` | +| Message Flow Handling | Append inputs and consume streaming outputs within the same session | `query()`, `stream_input()`, `receive_messages()`, `receive_response()` | +| Dynamic Control | Switch models/permissions at runtime, trigger interrupts, retrieve initialization data | `interrupt()`, `set_permission_mode()`, `set_model()`, `get_server_info()` | +| Transport Abstraction | Pluggable transports for local/remote CLI and buffered daemons | `Transport`, `SubprocessCLITransport`, `max_buffer_size` validation | + +### Session Loop and Context + +- `QwenAgentClient.connect()` supports both string and async stream modes. Hooks and the SDK MCP server can be injected during the first `initialize` call to ensure multi-turn sessions across IDEs and services. +- `disconnect()` clears `_query` and the transport. Combined with `async with`, it automates connection/disposal to avoid orphaned subprocesses. +- Parameter validation should proactively reject mutually exclusive options (for example, enabling both `can_use_tool` and `permission_prompt_tool_name`) to improve configuration ergonomics. + +### Message Flow Handling + +- `query()` automatically fills `session_id` and writes JSONL records, allowing scripts and UIs to append messages as needed. +- `stream_input()` handles incremental writes, clears buffers on exceptions, and records debug logs to stabilize long-running flows. +- `receive_messages()` iterates over every event, while `receive_response()` stops as soon as it encounters a `ResultMessage`, helping UIs detect when a single response completes. +- The `include_partial_messages` option forwards incremental chunks. Combined with `stderr`/`debug_stderr`, it captures low-level diagnostics for real-time rendering in complex interfaces. + +### Dynamic Control Interfaces + +- `_send_control_request` supports subtypes such as `interrupt`, `set_permission_mode`, and `set_model`, enabling manual interruption, permission mode switching, and hot model changes. +- `get_server_info()` fetches CLI initialization capabilities (protocol versions, command set, etc.) for dynamic front-end or policy configuration. +- `async query(...)->AsyncIterator` exposes a script-friendly interface that consumes the entire response in one go while preserving the ability to swap in custom transports. + +### Single-Query and Multi-Agent Orchestration + +- `query()` wraps the session lifecycle for rapid integrations such as scripts or batch tasks. When a custom `Transport` is provided, remote CLIs or agent services can be reused. +- The `agents` configuration maps multi-agent topologies onto the CLI `--agents` flag. In conjunction with `fork_session`, it supports session branching and sub-agent routing. +- When generating CLI commands, the SDK must compose JSON payloads for options such as `setting_sources`, `allowed_tools`, and `mcp_servers` to ensure feature parity with the CLI. + +### Hooks and MCP Bridging + +- During initialization, hook definitions convert the `HookMatcher` list into `hookCallbackIds`. Subsequent `hook_callback` events use those IDs to invoke host logic at critical stages such as `PreToolUse`, `PostToolUse`, and `UserPromptSubmit`. +- On the SDK side, `create_sdk_mcp_server` / `defineTools` build an MCP server inside the host process. `mcp_message` requests trigger JSON-RPC procedures (`tools/list`, `tools/call`, `initialize`, etc.) without spawning extra subprocesses. +- Permission approvals are returned via `PermissionResultAllow` / `PermissionResultDeny` and `PermissionUpdate`, enabling fine-grained policies that can be adjusted in hooks or tool callbacks. +- **Example: Combining PreToolUse and UserPromptSubmit** (based on `third-party/anthropics/claude-agent-sdk-python/examples/hooks.py`): + ```python + import asyncio + from qwen_agent_sdk import HookContext, HookMatcher, QwenAgentOptions, QwenSDKClient + + + async def block_risky_bash( + input_data: dict[str, object], + tool_use_id: str | None, + context: HookContext, + ) -> dict[str, object]: + command = input_data.get("command", "") if isinstance(input_data, dict) else "" + if isinstance(command, str) and "rm -rf" in command: + return { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": "Block dangerous delete command.", + } + } + return {} + + + async def inject_user_memory( + input_data: dict[str, object], + tool_use_id: str | None, + context: HookContext, + ) -> dict[str, object]: + return { + "hookSpecificOutput": { + "hookEventName": "UserPromptSubmit", + "additionalContext": "Remember that the user's favorite color is blue.", + } + } + + + options = QwenAgentOptions( + allowed_tools=["Bash"], + hooks={ + "PreToolUse": [HookMatcher(matcher="Bash", hooks=[block_risky_bash])], + "UserPromptSubmit": [HookMatcher(matcher=None, hooks=[inject_user_memory])], + }, + ) + + + async def main() -> None: + async with QwenSDKClient(options=options) as client: + await client.query("Try to run: rm -rf /tmp") + async for message in client.receive_response(): + ... + + + asyncio.run(main()) + ``` + +### Transport Layer Extensions + +- The `Transport` abstraction makes it possible to replace the default `SubprocessCLITransport` for remote CLI, agent service, or container-hosted scenarios. +- Before the handshake, the transport performs version checks and enforces `max_buffer_size`; when output exceeds the limit it raises a structured error and cleans buffers to prevent memory bloat. +- Options can specify working directory, environment variables, and stderr capture to support IDE integration and server deployments flexibly. +- `SubprocessCLITransport` must inject CLI arguments such as `--mcp-config`, `--agents`, and `--setting-sources`, and set environment variables like `CLAUDE_CODE_ENTRYPOINT` plus custom `env` and `user` values to keep subprocess capabilities aligned. +- **Example: Custom Remote Transport** (see `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/_internal/transport/__init__.py`): + ```python + import json + from collections.abc import AsyncIterator + import httpx + from qwen_agent_sdk import QwenAgentOptions, QwenSDKClient + from qwen_agent_sdk.transport import Transport + + + class RemoteHTTPTransport(Transport): + def __init__(self, endpoint: str) -> None: + self._client = httpx.AsyncClient(base_url=endpoint, timeout=30.0) + self._ready = False + + async def connect(self) -> None: + self._ready = True + + async def write(self, data: str) -> None: + await self._client.post("/ingress", content=data.encode("utf-8")) + + def read_messages(self) -> AsyncIterator[dict[str, object]]: + async def iterator() -> AsyncIterator[dict[str, object]]: + async with self._client.stream("GET", "/egress") as response: + async for line in response.aiter_lines(): + if line: + yield json.loads(line) + return iterator() + + async def close(self) -> None: + await self._client.aclose() + self._ready = False + + def is_ready(self) -> bool: + return self._ready + + async def end_input(self) -> None: + await self._client.post("/ingress/close") + + + transport = RemoteHTTPTransport("https://cli-gateway.internal") + options = QwenAgentOptions(system_prompt="Execute every command through the remote CLI gateway.") + + + async with QwenSDKClient(options=options, transport=transport) as client: + await client.query("Read README.md and summarize three highlights.") + async for message in client.receive_response(): + ... + ``` + +### Debugging and Environment Injection + +- CLI orchestration must derive the full command list from `QwenAgentOptions`, distinguishing between streaming (`--input-format stream-json`) and single-shot (`--print`) execution. +- `options.stderr` and `debug_stderr` let the SDK capture CLI diagnostic output, while anyio `TaskGroup` consumes stderr streams in real time. +- Parameters such as `cwd`, `env`, and `user` determine the subprocess working directory and permission boundaries; the SDK should pass them explicitly at startup and reclaim resources on disconnect. +- **Example: Error Callback and Retry** (see `third-party/claude-agent-sdk-python-demo/add_my_permission.py`): + ```python + import anyio + from qwen_agent_sdk import ( + PermissionDecision, + PermissionRequest, + PermissionUpdate, + QwenAgentOptions, + QwenSDKClient, + ) + + + async def decide_permission(request: PermissionRequest) -> PermissionDecision: + if request.tool_name == "Write" and request.input and "rm -rf" in str(request.input): + return PermissionDecision(allow=False, reason="Reject commands that may break work.") + return PermissionDecision(allow=True, updates=[PermissionUpdate(mode="allow")]) + + + async def main() -> None: + options = QwenAgentOptions( + allowed_tools=["Read", "Write"], + permission_callback=decide_permission, + permission_mode="ask", + ) + + async with QwenSDKClient(options=options) as client: + try: + await client.query("Delete all temporary files and write a new log.") + async for message in client.receive_response(): + ... + except Exception: + await client.query("Instead, only clean the ./tmp directory.") + async for message in client.receive_response(): + ... + + + anyio.run(main) + ``` + +## SDK Implementation Overview + +| Language | Runtime Requirements | Distribution Form | Key Dependencies | Main Capabilities | Status | +| --- | --- | --- | --- | --- | --- | +| Python | Python 3.10+ | `pyproject + hatchling`, namespace `qwen_agent_sdk`, ships `py.typed` | `anyio>=4`, `typing_extensions`, `mcp>=0.1`, `pydantic>=2` | `query()` fast path, `QwenSDKClient`, tool registration, permission/hook callbacks, logging and OTel | Initial release | +| TypeScript | Node.js 18+ | Package `@qwen-agent/sdk`, default ESM exports, `tsup` build outputs | `@qwen-code/cli`, `zx/execa`, `eventemitter3` | `createAgentManager`, streaming iteration, permission callbacks, MCP tools, `settingSources` control | Initial release | +| Go | TBA | TBA | TBA | Reuse control protocol, expose common APIs | TODO | +| Java | TBA | TBA | TBA | SDK for enterprise scenarios | TODO | + +### Python SDK Details + +- **Runtime and Distribution**: Requires Python 3.10+; distributed via `pyproject.toml + hatchling`; provides the `qwen_agent_sdk` namespace plus `py.typed`. +- **Environment Dependencies**: Node.js and the `qwen-code` CLI must be installed. Before startup the SDK locates the binary via `which qwen` or the `QWEN_BIN` environment variable. +- **Core Dependencies**: `anyio>=4`, `typing_extensions`, `mcp>=0.1`, `pydantic>=2`. +- **Typical Scenarios**: Third-party backend services that need customized experiences or server-side execution. +- **API Design**: + - `async def query(...) -> AsyncIterator[Message]`: Aligns with Anthropic’s streaming `query()` interface. + - `class QwenSDKClient`: Supports `async with`, session continuation, and interruption management. + - `QwenAgentOptions`: Includes parameters such as `system_prompt`, `setting_sources`, `permission_mode`, `cwd`, and `fork_session`. + - `@tool` decorator + `create_sdk_mcp_server`: Register Python functions as MCP tools. +- **Implementation Highlights**: + - `StdIOSubprocessTransport` launches the `qwen` CLI, writes JSONL, and reads streaming chunks. + - `_handle_control_request()` processes subtypes such as `can_use_tool`, `hook_callback`, and `mcp_message`, writing back corresponding `control_response`. + - **Hook System**: Supports events such as `PreToolUse`, `PostToolUse`, and `UserPromptSubmit`. Returns JSON instructions to adjust the session flow, mirroring Anthropic’s hook JSON structure. + - `Query.initialize()` sends an initial `control_request{subtype:"initialize"}` to synchronize hook configuration and capability declarations. + - Unifies `PermissionResult`, hook JSON, and MCP JSON-RPC payloads under a consistent abstraction. +- **Logging and Observability**: + - Emits structured JSON logs by default and supports injecting a `structlog` logger. + - **Lightweight Logging Convention**: Follow standard `logging`-style structured JSON output. `options.stderr`/`debug_stderr` can capture raw CLI error streams for faster troubleshooting. + - Plans to bundle OpenTelemetry tracer/meter instrumentation for session latency, transport errors, and worker utilization. +- **Resilience**: + - Implements retries and backoff for transient transport failures. + - Guards against buffer overflows by leveraging `max_buffer_size`. + - Gracefully degrades when CLI capabilities are missing, surfacing actionable errors. +- **Examples**: + - **Streaming Query with Cost Tracking** (adapted from `third-party/anthropics/claude-agent-sdk-python/examples/basic_stream.py`): + ```python + import anyio + from qwen_agent_sdk import ( + AssistantMessage, + QwenAgentOptions, + QwenSDKClient, + ResultMessage, + query, + ) + + + async def main() -> None: + options = QwenAgentOptions(model="qwen3-coder-plus") + async for message in query(prompt="What is 2 + 2?", options=options): + if isinstance(message, AssistantMessage): + for block in message.content: + if hasattr(block, "text"): + print(block.text) + elif isinstance(message, ResultMessage) and message.total_cost_usd: + print(f"Spent this round: ${message.total_cost_usd:.4f}") + + + anyio.run(main) + ``` + - **Multi-Agent and Setting Sources** (see `third-party/anthropics/claude-agent-sdk-python/examples/agents.py` and `.../setting_sources.py`): + ```python + from qwen_agent_sdk import AgentDefinition, QwenAgentOptions, query + + + options = QwenAgentOptions( + agents={ + "doc-writer": AgentDefinition( + description="Produce structured documentation.", + prompt="You are a senior documentation engineer. Explain with clear steps.", + tools=["Read", "Write"], + ), + "tester": AgentDefinition( + description="Generate and run tests.", + prompt="You are responsible for authoring tests and verifying outcomes.", + tools=["Read", "Write", "Bash"], + ), + }, + setting_sources=["user", "project"], + ) + + + async for message in query( + prompt="Invoke doc-writer to explain what AgentDefinition is for.", + options=options, + ): + ... + ``` + - **Embedded MCP Tool** (see `third-party/anthropics/claude-agent-sdk-python/examples/mcp_calculator.py`): + ```python + from qwen_agent_sdk import ( + AssistantMessage, + QwenAgentOptions, + QwenSDKClient, + TextBlock, + create_sdk_mcp_server, + tool, + ) + + + @tool("add", "Calculate the sum of two numbers.", {"a": float, "b": float}) + async def add_numbers(args: dict[str, float]) -> dict[str, object]: + total = args["a"] + args["b"] + return {"content": [{"type": "text", "text": f"{args['a']} + {args['b']} = {total}"}]} + + + calculator = create_sdk_mcp_server( + name="calculator", + version="1.0.0", + tools=[add_numbers], + ) + + + options = QwenAgentOptions( + mcp_servers={"calc": calculator}, + allowed_tools=["mcp__calc__add"], + ) + + + async with QwenSDKClient(options=options) as client: + await client.query("Call mcp__calc__add to compute 6 + 7.") + async for message in client.receive_response(): + if isinstance(message, AssistantMessage): + for block in message.content: + if isinstance(block, TextBlock): + print(block.text) + else: + ... + ``` + +### TypeScript SDK Details + +- **Runtime and Distribution**: Requires Node.js 18+. Package name `@qwen-agent/sdk` exports ESM by default and exposes CJS via `exports`. Uses `tsup` to generate `dist/esm`, `dist/cjs`, and `dist/types`. +- **Core Dependencies**: `@qwen-code/cli` (peer dependency), `zx/execa`, `eventemitter3`. +- **API Capabilities**: + - `createAgentManager(options)`: Provides `createSession`, `run`, and `forkSession`. + - `session.stream(task)`: Returns `AsyncIterable` for consumption with `for await`. + - `onPermissionRequest`: Returns permission decisions using `allow` / `deny` / `ask` plus associated rules. + - `settingSources`: Disabled by default; explicitly declare entries like `["user", "project", "local"]` to load corresponding settings files. + - `defineTools`: Registers MCP tools that share context with CLI sessions. + - `agents` option: Supports inline multi-agent topologies; combine with `forkSession` to orchestrate sub-agents. +- **Implementation Notes**: + - Launches the CLI via `execa` and parses stdout uniformly as `AgentStreamChunk`. + - `ProcessTransport` decodes stdout line by line (`JSON.parse`) and relays `control_request`, `result/*`, and `chat.completion*` events through `EventEmitter`, while sending all `control_response` payloads through stdin. + - Maintains a `result/heartbeat` timer that automatically restarts workers on timeout. + - The `pendingControl` map uses `request_id` to route `control_request` callbacks. + - Callback promises create normalized `control_response` payloads; unregistered callbacks fall back to defaults. + - `onPermissionRequest` and `onHookEvent` resolve to promises that generate `control_response`. If no callback is registered, the system keeps using default policies to avoid blocking the CLI. + - `defineTools()` packages TypeScript functions into an in-process MCP server and forwards JSON-RPC calls. + - During initialization, the SDK waits for the CLI’s first `chat.completion` handshake, then sends hook/tool capabilities via `control_request{subtype:"initialize"}`. + - Exceptional cases log verbose diagnostics and return `control_response{subtype:"error"}`. +- **Engineering System**: + - `tsup` produces `dist/esm`, `dist/cjs`, and `dist/types`. + - Test matrix uses `vitest` + `tsx` combined with CLI mocks to verify streaming output and permission callbacks. + - Exposes `enableVerboseLogging()`, `logger` / `stderr` callbacks, and plans to integrate OpenTelemetry tracing. + +### Other Language Plans (TODO) + +- **Go / Java**: Intend to reuse the unified control protocol and worker scheduling strategy, targeting enterprise and backend scenarios. +- Detailed SDK designs and implementations will follow once the Python/TypeScript versions stabilize. + +## Control Protocol and MCP Integration + +| Channel | Initiator | Typical Events | Response Requirement | Purpose | +| --- | --- | --- | --- | --- | +| `chat.completion*` | CLI → SDK | Model replies, tool invocations, closing summaries | No response required | Carries the primary conversation | +| `result/*` | CLI → SDK | `result/command`, `result/heartbeat`, `result/cancel`, `x-qwen-session-event` | No response required | Publishes status and auxiliary info | +| `*request` | SDK → CLI | `command_hint_request`, `heartbeat_request`, `control/cancel` | CLI responds with `result/*` | Triggers immediate operations | +| `control_request` | CLI → SDK | `can_use_tool`, `hook_callback`, `mcp_message` | Must return `control_response` | Executes permission checks, hooks, MCP calls | + +- All control events travel through the shared STDIN/STDOUT pipes, and the SDK must respond within the agreed timeout. +- MCP integration builds the server inside the SDK process, eliminating extra subprocesses or network services. +- Authorization callbacks and MCP calls remain decoupled: the CLI still emits `can_use_tool`, the SDK handles permission logic, and only then processes `mcp_message`. + +```mermaid +sequenceDiagram + participant CLI as qwen-code CLI (stdout/stdin) + participant Control as Agent SDK ControlPlane + participant MCP as In-Process MCP Server + participant Tool as User-Defined Tool + + CLI->>Control: control_request (subtype="mcp_message", stdout) + Control->>MCP: JSON-RPC (tools/list | tools/call) + MCP->>Tool: Invoke async handler + Tool-->>MCP: Return result + MCP-->>Control: jsonrpc result (mcp_response) + Control-->>CLI: control_response (stdin) +``` + +- During initialization, `control_request{subtype:"initialize"}` synchronizes hook configuration and capability declarations. +- When callbacks throw, the SDK must log the failure and return `control_response{subtype:"error"}` so the CLI can safely fall back. + +| `control_request.subtype` | Key Input | SDK Response Constraints | Notes | +| --- | --- | --- | --- | +| `initialize` | Includes `hooks` config and optional MCP capability declarations | Return `control_response` confirming success or failure; persist `_initialization_result` | Map `HookMatcher` callbacks to `hookCallbackIds` for later invocation | +| `can_use_tool` | Provides `tool_name`, `input`, `permission_suggestions` | Return `behavior=allow/deny`, optionally `updatedInput`, `updatedPermissions` | `updatedPermissions` is an array of `PermissionUpdate`, supporting rule, directory, and mode adjustments | +| `hook_callback` | Supplies `callback_id` and hook context | Look up and execute the matching callback, then return JSON | Cache callback mappings during connection; missing entries must yield structured errors | +| `mcp_message` | Provides `server_name` and JSON-RPC `message` | Call the SDK’s MCP server and wrap the `mcp_response` | Supports standard methods (`initialize`, `tools/list`, `tools/call`); failures must return JSON-RPC error objects | + +- `PermissionUpdate` supports `addRules`, `replaceRules`, `removeRules`, `setMode`, `addDirectories`, `removeDirectories`, etc. The SDK must forward rule content and directory sets precisely to satisfy enterprise-grade governance. +- Hook configuration allows multiple events and matchers. `HookMatcher.matcher` specifies matching conditions, `hooks` lists callbacks, and the SDK generates callback IDs during initialization to route later `hook_callback` executions. + +## Communication Model and MCP Capabilities + +| Module | Form | Key Notes | +| --- | --- | --- | +| IPC Mode | STDIN/STDOUT JSON Lines | The SDK launches a local `qwen` subprocess and exchanges JSON Lines. Protocol details follow `qwen-code-cli-output-format-stream-json-rfc_cn.md`, preserving instant replies for `/`, `@`, and `?` commands. | +| In-Process MCP Server | SDK-embedded MCP server | Relies on `mcp>=0.1` to create an MCP server inside the host process, forwarding functions defined via `@tool` / `defineTools` without additional subprocesses or network hops. | + +- **IPC Implementation Highlights**: + - `SubprocessCLITransport` (or equivalent) handles both directions over the same STDIN/STDOUT pipes—no extra sockets required. + - When the CLI emits `chat.completion` / `chat.completion.chunk`, the first message’s `metadata` must carry `protocol_version`, `input_format`, `output_format`, and `capabilities`. + - Event semantics cover `result/heartbeat`, `result/cancel`, `x-qwen-session-event`, and `control_request/control_response`, and expose OpenAI-style error objects. +- **MCP Event Flow**: + - The CLI writes JSON-RPC payloads via `control_request{subtype:"mcp_message"}`. The SDK forwards them to the local MCP server, which handles `tools/list` / `tools/call`. + - Results are wrapped in `control_response` and written back to stdin to complete the loop. +- **Authorization Responsibilities**: + - The CLI emits `control_request{subtype:"can_use_tool"}` while the SDK decides authorization, keeping MCP invocation separate from permission checks. + - Executing tools inside the SDK process reduces latency. Hook capabilities can share the same channel, consistent with Claude Agent SDK practices. + +## Worker Pool and Reuse + +| Dimension | Design Highlights | Implementation Status | +| --- | --- | --- | +| State Machine | Idle → Busy → Idle, each worker exclusively serves a session | Designed | +| Reuse Strategy | Retain processes after sessions, clean context, and reuse | Planned rollout | +| Safety | Session isolation, resource cleanup, health checks | Delivered alongside the worker pool | +| Configuration | `min_workers`, `max_workers`, `idle_timeout`, `max_sessions_per_worker`, `health_check_interval` | Must be exposed in SDK/CLI configs | +| Observability | Structured logs, metric export, trace linkage | SDK and CLI instrument independently | + +- **Environment**: Workers are qwen-code CLI subprocesses. Containers/sandboxes and tool bridging are handled by the CLI, while the SDK only schedules via STDIN/STDOUT. +- Each worker is exclusive to a session. When the session ends, the worker returns to the idle pool. +- Reuse depends on clearing session variables, closing file handles, and resetting environment variables. +- Health checks watch for memory leaks, zombie processes, and deadlocks. Failures trigger automatic restarts. +- Example configuration: + +```yaml +worker_pool: + min_workers: 5 + max_workers: 50 + idle_timeout: 1800 # 30 minutes + max_sessions_per_worker: 100 + health_check_interval: 60 +``` + +- Session logs should record `session_id`, `prompt_id`, elapsed time, and command summaries, then feed centralized logging. +- Metrics focus on active/idle counts, queue latency, restart counts, and failure rates. Traces must propagate across SDK → CLI → tool calls. + +## Configuration Injection and Settings Management + +| Item | Capability | Notes | +| --- | --- | --- | +| `settings_profile` | Assign an isolated configuration per `QwenClient` | Affects the worker pool and subprocesses owned by that client | +| `system/system_defaults` | Provide an absolute path or JSON overrides | Mapped to CLI environment variables such as `QWEN_CODE_SYSTEM_SETTINGS_PATH` | +| `user/workspace` | JSON objects or file/directory paths | SDK generates `settings.json` in a temporary directory and mounts it for the CLI | +| `overrides` | Key-value overrides such as `model.name`, `tools.allowed` | Written directly into the temporary config file | + +- **Ecosystem Reuse**: Inherits the CLI’s multi-layer settings hierarchy (`SettingScope.System/SystemDefaults/User/Workspace`). Each `QwenClient` is isolated, and when no profile is provided the CLI follows its default loading order. +- **Implementation Steps**: + 1. Add a `settings_profile` field to `QwenClientOptions` / `QwenAgentOptions` across Python and TypeScript SDKs. + 2. Before launching CLI workers, write the profile into an isolated directory and configure `--setting-sources` / `--settings` (or equivalent flags). + 3. Set environment variables such as `QWEN_CODE_USER_SETTINGS_PATH` and `QWEN_CODE_WORKSPACE_SETTINGS_PATH` to point at the generated temporary files. + 4. Remove the temporary directory when the worker pool shuts down to prevent configuration leakage. +- **Logging and Troubleshooting**: Emit redacted profile summaries in logs to diagnose configuration mismatches. +- **Security Considerations**: Only the host application injects configuration—no fallback to shared paths—to prevent cross-tenant contamination. Sensitive tokens and paths must be safeguarded. +- **Compatibility**: The CLI must parse the new environment variables and safely ignore unrecognized keys. +- **Current Status Reminder**: The CLI has not yet implemented per-subprocess configuration. Future RFCs/PRs must deliver `--settings-profile` and related environment variable support. + +## Agent SDK Orchestration Capabilities + +| Module | Core Responsibility | Current Status | Next Steps | +| --- | --- | --- | --- | +| IPC Wrapper | Parse/write JSON Lines, route events | CLI still uses plain-text STDIN and needs enhancement | Introduce `StdinReaderService`, `StdoutWriterService`, and strengthen `main()` | +| Process Management | Launch, monitor, enforce resource limits, log | Initial design | Establish resource quotas and automatic restart policies | +| Control Protocol | Permission callbacks, hook injection | CLI only exposes ApprovalMode | Add permission APIs, hook system, and instrumentation | +| Input/Output Formats | `--input-format` / `--output-format` | CLI must support `stream-json` | Complete argument parsing and auto-disable TUI | +| Event Semantics | `result/heartbeat`, `control_request`, etc. | Under definition | Implement aligned with the CLI output format RFC | + +- `docs/ipc/qwen-chat-request-schema.json` extends OpenAI `/chat/completions` by adding `session_id`, `prompt_id`, `origin`, and `tool_call_id`. +- Error semantics must match the CLI output format: fatal errors emit OpenAI-style error objects, while recoverable errors use `chat.completion`. +- Immediate responses and event dispatch are required for `/`, `@`, and `?` command requests. + +## Observability and Debugging + +| Area | Highlights | +| --- | --- | +| Logging | SDK writes structured JSON by default; the CLI can forward stderr and supports injecting a `logger` | +| Metrics | SDK/CLI expose active workers, queue latency, error counts, etc., with a plan to standardize metric names | +| Trace | Create session-level spans, propagate through CLI and tool invocation chains, and enable end-to-end troubleshooting | +| Debugging Tools | TypeScript offers `enableVerboseLogging()`, Python captures `debug_stderr`, and both plan to integrate OTel | + +- Third-party services should record message sequences to support auditing and replay. +- CLI command samples can be reused for local debugging (see the output format RFC). + +## Integration Model + +| Pattern | Applicable Scenarios | Key Traits | +| --- | --- | --- | +| Host Process Integrates SDK | IDE plugins, enterprise tools, CLI extensions | Launch worker pools inside the host process, communicate with the CLI via IPC JSONL, support synchronous and streaming output | + +**Quick Start Example**: + +```python +from qwen_agent_sdk import QwenClient + +with QwenClient(binary_path="qwen", model="qwen3-coder-plus") as client: + result = client.chat( + task="Scan main.py for potential bugs and fix them.", + workspace="/repos/demo" + ) + print(result.summary) +``` + +- Third-party applications can rely on `qwen-agent-sdk` to manage sessions, tools, and permission policies uniformly. +- The SDK must support session replay and cancellation, heartbeat maintenance, and timeout control. +- **Example: All-in-One Session Scaffold** (see `third-party/claude-agent-sdk-python-demo/quick_start_example.py`): + ```python + import anyio + from qwen_agent_sdk import ( + AssistantMessage, + QwenAgentOptions, + QwenSDKClient, + TextBlock, + create_sdk_mcp_server, + query, + tool, + ) + + + @tool("get_system_info", "Retrieve system information.", {}) + async def get_system_info(_: dict[str, object]) -> dict[str, object]: + import os + import platform + + summary = "\n".join( + [ + f"- OS: {platform.system()} {platform.release()}", + f"- Python version: {platform.python_version()}", + f"- Current directory: {os.getcwd()}", + ] + ) + return {"content": [{"type": "text", "text": summary}]} + + + async def run_all_examples() -> None: + # 1) Basic Q&A + async for msg in query(prompt="Hello, introduce yourself."): + if isinstance(msg, AssistantMessage): + for block in msg.content: + if isinstance(block, TextBlock): + print(block.text) + + # 2) Custom parameters + streaming + options = QwenAgentOptions(system_prompt="You are an experienced assistant. Respond step by step.") + async for msg in query("Explain the difference between multiprocessing and multithreading.", options=options): + ... + + # 3) Register custom tools + server = create_sdk_mcp_server(name="my-tools", version="1.0.0", tools=[get_system_info]) + async with QwenSDKClient( + options=QwenAgentOptions( + mcp_servers={"tools": server}, + allowed_tools=["mcp__tools__get_system_info"], + ) + ) as client: + await client.query("Fetch the current runtime environment.") + async for msg in client.receive_response(): + ... + + + anyio.run(run_all_examples) + ``` + +## Open Items and Future Work + +| Direction | Pending Tasks | +| --- | --- | +| CLI Protocol Support | Complete `stream-json` input/output, event semantics, and handshake metadata | +| Control Protocol | Implement permission callback APIs, hook instrumentation, MCP `mcp_message` pathway | +| Worker Pool | Improve health checks, resource quotas, and automatic fault isolation | +| Configuration System | Add CLI support for `--settings-profile` and related environment variables | +| Multi-Language SDKs | Expand Go/Java versions, build a unified test matrix and documentation | +| Observability | Jointly define metric names and trace sampling strategies, refine logging guidelines | diff --git a/docs/rfc/qwen-code-agent-framework-rfc_cn.md b/docs/rfc/qwen-code-agent-framework-rfc_cn.md new file mode 100644 index 000000000..8400b1bb8 --- /dev/null +++ b/docs/rfc/qwen-code-agent-framework-rfc_cn.md @@ -0,0 +1,445 @@ +# Qwen-Code Agent 框架架构设计 + +> **设计版本**: v1.1 +> **最后更新**: 2025-10-10 + +## 设计说明 + +本文档描述了 Qwen-Code Agent 框架的完整架构设计。 + +### 重要概念澄清 + +**核心组件: Qwen-Code Agent SDK** + +文档聚焦于 **Qwen-Code Agent SDK** 的设计,基于统一的IPC协议为各语言提供对应的agent sdk,给第三方应用提供qwen code集成开发,提供对qwen code统一的会话调度、进程管理与控制协议能力。 + +- **作用**: + - 在宿主应用进程内封装会话路由与控制协议 + - 负责 Worker 池的生命周期、健康检查与调度 + - 提供一致的 集成API,符合qwen code定义的IPC/JSONL 抽象,屏蔽 CLI 细节 +- **形态**: SDK + +核心功能: + +- 会话调度与路由 +- CLI 子进程生命周期与资源治理 +- 控制协议 Hook / 权限判定 +- 轻量日志输出与可观测接入 +- 观察性数据采集 (日志、指标、追踪) + +## 完整系统架构 + +```mermaid +flowchart LR + subgraph Clients["第三方应用 / 服务"] + direction LR + TypeScriptSDK["qwen-agent-sdk
TypeScript"] + PythonSDK["qwen-agent-sdk
Python"] + GoSDK["qwen-agent-sdk
Go (TODO)"] + JavaSDK["qwen-agent-sdk
Java (TODO)"] + end + + subgraph AgentSDK["Qwen-Code Agent SDK"] + direction TB + Router["会话调度
路由 / 负载均衡"] + ControlPlane["控制协议
Hook / 权限判定"] + WorkerPool["Worker 池管理
分配 / 回收 / 健康检查"] + ProcessMgr["子进程管理
启动 / 监控 / 重启"] + IPC["IPC 适配层
STDIN/STDOUT JSONL"] + end + + subgraph Workers["qwen-code CLI Workers"] + direction LR + Worker1["Worker #1
qwen-code CLI"] + Worker2["Worker #2
qwen-code CLI"] + WorkerN["Worker #N"] + end + + subgraph Services["外围服务"] + LLM_MCP["大模型服务/MCP 服务"] + Monitor["监控告警"] + Logger["日志中心"] + Trace["链路追踪"] + end + + Clients --> Router + Router --> ControlPlane + Router --> WorkerPool + WorkerPool --> ProcessMgr + ProcessMgr --> IPC + ControlPlane -->|control_response| IPC + IPC -->|control_request| ControlPlane + IPC --> Worker1 + IPC --> Worker2 + IPC --> WorkerN + + Worker1 --> LLM_MCP + Worker2 --> LLM_MCP + WorkerN --> LLM_MCP + + Router --> Monitor + Router --> Logger + Router --> Trace + IPC -->|result/*| ControlPlane + ControlPlane -->|request| IPC + + classDef clientStyle fill:#e67e22,stroke:#ba6c1e,color:#fff + classDef sdkStyle fill:#f39c12,stroke:#ca7e08,color:#fff + classDef workerStyle fill:#16a085,stroke:#138d75,color:#fff + classDef serviceStyle fill:#95a5a6,stroke:#707b7c,color:#fff + class Clients,TypeScriptSDK,PythonSDK,GoSDK,JavaSDK clientStyle + class AgentSDK,Router,ControlPlane,ProcessMgr,IPC,WorkerPool sdkStyle + class Workers,Worker1,Worker2,WorkerN workerStyle + class Services,MCP,Monitor,Logger,Trace serviceStyle +``` + +> **双向通信说明**: +> - Agent SDK 与 qwen-code CLI 共用 STDIN/STDOUT 建立双向 JSONL 通道。 +> - CLI 将 `chat.completion`/`result/*` 及 `control_request` 等事件 (比如工具权限请求、MCP 调用) 逐行写入 stdout。 +> - SDK 读取事件后, 视需求通过 stdin 写回 `request`/`control_response` 等控制消息, 保持状态一致。 +> - 当事件拓展为 `control_request` 内的 `subtype:"mcp_message"`(规划中, 参考 Claude 控制协议)时, ControlPlane 会把 JSON-RPC 转发给 In-Process MCP Server 执行对应工具, 再把 `mcp_response` 封装进 `control_response` 回传 CLI, 形成闭环。 + +> **可观测协同说明**: +> - qwen-code CLI 已接入 OpenTelemetry, 上报模型调用、工具执行、CLI 内部事件等指标与 Trace。 +> - Agent SDK 需独立接入 OpenTelemetry, 聚焦会话排队、Worker 生命周期、STDIO 错误等 SDK 层指标/日志/Trace。 +> - 两侧通过统一 Trace/Span ID 串联, 形成端到端链路:SDK 提供上游调度与排队视角, CLI 提供下游执行细节, 便于统一排障。 + +> **事件分类提示**: 图中 `result/*`、`request`、`control_request` 等箭头对应的详细事件语义, 请参见《qwen-code-cli-output-format-stream-json-rfc_cn.md》的“事件机制分类”章节。 + +## 关键组件说明 + +### 1. Qwen-Code Agent Client SDK + +- **多语言支持**: + - `qwen-agent-sdk` (Python): 首发绑定, 复用 TypeScript 控制协议并提供 Pythonic API + - `qwen-agent-sdk` (TypeScript): 核心实现, 提供子进程编排与控制协议能力 + - `qwen-agent-sdk` (Go): TODO + - `qwen-agent-sdk` (Java): TODO + +- **适用场景**: + - 第三方后端服务集成 + - 希望自定义交互层体验的场景 + - 服务端到服务端调用 + +- **核心功能**: +- 同步/异步任务执行 +- 流式输出 +- 会话管理 +- 错误处理与重试 +- In-Process MCP Server 工具桥接 +- 独立配置注入 + +#### 通信模式 + +- **IPC 模式**: SDK 启动本地 `qwen` 子进程,以 JSON Lines 协议进行进程间通信。 + +> 📘 IPC 模式的协议与最新 CLI IPC 草案详见《qwen-code-cli-output-format-stream-json-rfc_cn.md》。 + +#### In-Process MCP Server + +- **形态**: SDK 依赖 `mcp>=0.1` 在宿主进程内创建 MCP Server, 将通过 `@tool`/`defineTools` 定义的函数注册为工具, 无需额外子进程或网络服务。 +- **事件链路**: 当 CLI 输出(或后续扩展为) `control_request` 内的 `subtype:"mcp_message"` 事件到 stdout 时, SDK 把 JSON-RPC 内容转发给本地 MCP Server 执行 `tools/list`、`tools/call` 等操作, 并把结果封装为 `control_response` 回写 stdin。 +- **授权分工**: 工具执行前的授权仍由 CLI 触发 `control_request{subtype:"can_use_tool"}` 等事件并交由 SDK 回调处理, 与 MCP 调用链路解耦, 保持权限判定的独立性。 +- **优势**: 在保持 CLI 原有授权/审批流程不变的前提下, 将工具执行搬到 SDK 进程内以降低延迟; Hook 能力仍待 CLI 侧落地后再沿同一通路接入。整体方案与 Claude Agent SDK 的 In-Process 实践对齐, 便于多语言同步拓展。 + +#### 独立配置注入 (每个 QwenClient) + +- **能力概述**: 允许第三方在创建 `QwenClient` 时可选提供一份针对该客户端的 qwen-code 设置配置 (`settings_profile`), 仅影响该客户端维护的 Worker 池及其衍生子进程。 +- **作用范围**: 生态复用 CLI 的多层设置体系 (`SettingScope.System/SystemDefaults/User/Workspace`), 但不同 `QwenClient` 间相互隔离; 未提供 `settings_profile` 时, CLI 按既有规则从系统/用户/项目路径加载。 +- **形态**: + - `settings_profile.system` / `system_defaults`: 绝对路径或 JSON overrides, 映射到 CLI 中 `QWEN_CODE_SYSTEM_SETTINGS_PATH` / `QWEN_CODE_SYSTEM_DEFAULTS_PATH` (见 `packages/cli/src/config/settings.ts`)。 + - `settings_profile.user` / `workspace`: 可传入 JSON 对象或指向文件/目录; SDK 会在临时目录生成 `settings.json`, 并通过新引入的环境变量 (如 `QWEN_CODE_USER_SETTINGS_PATH`, `QWEN_CODE_WORKSPACE_SETTINGS_PATH`) 或 CLI `--settings-profile` 选项挂载到子进程。 + - `settings_profile.overrides`: 额外键值对, 直接写入生成的临时配置文件, 用于快速覆盖单个字段 (如 `model.name`, `tools.allowed`)。 +- **实现要点**: + 1. `QwenClientOptions`/`QwenAgentOptions` 同步新增 `settings_profile` 字段, Python/TypeScript SDK 均支持。 + 2. 在 Worker 池启动 CLI 前, 根据 profile 将配置写入隔离目录, 设置所需环境变量并更新 `--setting-sources` / `--settings` 参数。 + 3. Worker 池销毁时负责清理临时目录, 避免配置泄漏。 + 4. 日志中打印 profile 摘要 (不含敏感字段), 便于排查配置错配。 +- **安全考量**: 配置仅由宿主应用传入, SDK 不自动回退到共享路径; 避免跨租户污染。需在文档中提示敏感字段 (Token/路径) 应妥善管理。 +- **兼容性**: CLI 未定义的环境变量需在 CLI 端补齐解析逻辑, 并回退到默认行为 (忽略未知变量)。 +- **实施备注**: 目前 qwen-code CLI 尚未支持针对单个子进程注入独立配置。该能力需另行提交 CLI 端 RFC 与 PR(例如新增 `--settings-profile` 参数及相关环境变量), 在 CLI 实现完成后 SDK 方可落地。 + +```mermaid +sequenceDiagram + participant CLI as qwen-code CLI (stdout/stdin) + participant Control as Agent SDK ControlPlane + participant MCP as In-Process MCP Server + participant Tool as 用户自定义工具 + + CLI->>Control: control_request (subtype="mcp_message",stdout) + Control->>MCP: JSON-RPC (tools/list | tools/call) + MCP->>Tool: 调用异步处理函数 + Tool-->>MCP: 处理结果 + MCP-->>Control: jsonrpc result (mcp_response) + Control-->>CLI: control_response (stdin) +``` + +**集成方式**: + +```python +# 第三方通过 Agent SDK 集成 +import anyio +from qwen_agent_sdk import query, QwenAgentOptions + +async def main(): + async for message in query( + prompt="扫描并修复 @main.py 中的潜在 bug", + options=QwenAgentOptions(cwd="/repos/demo") + ): + print(message) + +anyio.run(main) +``` + +## 各语言 SDK 技术选型 + +### qwen-agent-sdk-python + +#### 运行时与分发 + +- **语言要求**: Python 3.10+, 与 Anthropic Python SDK 一致, 保障 `typing.Annotated`, `match` 等语法可用。 +- **包结构**: 采用 `pyproject.toml` + `hatchling` 构建, 发布 `py.typed` 以提供类型提示, 命名空间为 `qwen_agent_sdk`。 +- **环境依赖**: 需预装 Node.js 与 `qwen-code` CLI, SDK 启动前通过 `which qwen` 或 `QWEN_BIN` 环境变量定位二进制。 + +#### 核心依赖 + +- `anyio>=4`: 与 Anthropic SDK 一致, 统一 async/await 事件循环并兼容 Trio。 +- `typing_extensions`: 兼容 3.10/3.11 的 `TypedDict`, `NotRequired` 能力。 +- `mcp>=0.1`: 复用 In-Process MCP Server 能力, 支持装饰器式工具定义。 +- `pydantic>=2` (新增): 用于严格校验 JSONL 消息、权限响应与 Hook payload。 + +#### API 设计 + +- **快速函数**: `async def query(...) -> AsyncIterator[Message]`, 语义与 Anthropic `query()` 对齐, 支持字符串与 `AsyncIterable` 输入。 +- **会话客户端**: `class QwenSDKClient`, 支持 `async with` 上下文、会话续写与中断, 暴露 `receive_response()` 流式读取。 +- **选项对象**: `QwenAgentOptions`, 对齐 TypeScript `AgentOptions`, 含 `system_prompt`, `setting_sources`, `permission_mode`, `cwd`, `fork_session`。 +- **工具注册**: `@tool` 装饰器 + `create_sdk_mcp_server`, 允许 Python 原生函数作为 CLI 工具。 + +#### 技术实现要点 + +- **Transport 抽象**: 提供 `StdIOSubprocessTransport`, 负责启动 `qwen` CLI、写入 JSONL、读取流式 chunk, 并将 CLI 事件映射为 Python 数据类。 +- **工具权限回调**: 暴露与 CLI `control_request{subtype:"can_use_tool"}` 对应的处理接口, 接收工具名、输入 payload 与上下文, 返回 `PermissionResult` 以告知允许/拒绝及补充规则。 +- **Hook 体系**: 支持 `PreToolUse`、`PostToolUse`、`UserPromptSubmit` 等事件, 允许返回 JSON 指令修改会话 (参考 Anthropic Hook JSON)。 +- **可插拔日志**: 提供基于 `structlog` 的观察性接口, 默认输出标准 JSON 日志, 支持注入自定义 logger。 +- **轻量日志约定**: 默认按 `logging` 标准输出结构化 JSON, 允许通过 `options.stderr`/`debug_stderr` 捕获 CLI 原始错误流, 满足快速排障需求。 +- **OpenTelemetry 接入**: 规划内置 OTel Tracer/Meter, 上报 SDK 自身的会话耗时、传输错误、Worker 利用率等指标, 与 CLI 侧指标解耦。 +- **错误恢复**: 针对 CLI 崩溃提供自动重试与会话 fork, 保存最后一次成功结果以便断点续传。 + +#### 双向控制协议实现参考 (对齐 Claude Agent SDK) + +- **STDIO 对称信道**: 对齐《qwen-code-cli-output-format-stream-json-rfc_cn.md》定义的 `control_request`/`control_response` 语义, `SubprocessCLITransport` 始终通过同一 STDIN/STDOUT 管道处理正向/反向消息, 无需额外套接字。 +- **事件监听与解复用**: `Query._read_messages()` 按行读取 CLI 输出, 将 `type=control_request` 的 JSON 派发给 `_handle_control_request()`, 并通过 `pending_control_responses` + `request_id` 映射保证多请求并发时的正确回执。 +- **权限 / Hook / MCP 托管**: `_handle_control_request()` 将 `subtype` 映射到对应的回调: `can_use_tool` 触发 SDK 提供的权限协程, `hook_callback` 执行注册 Hook, 规划中的 `mcp_message` 事件则桥接到 in-process MCP Server (`tools/list`、`tools/call`、`initialize` 等)。处理结果统一写入 STDIN, 形成 `control_response` 行。 +- **初始化握手**: 流式模式下 `Query.initialize()` 先发起 `control_request{subtype:"initialize"}`, 同步 Hook 配置, 使 CLI 在后续事件中具备回调 SDK 的上下文与能力声明。 +- **故障回退**: 若回调抛异常, SDK 会返回 `subtype:error` 的 `control_response`, CLI 可依协议退回默认策略 (例如自动拒绝危险工具)。对齐 Claude SDK 的处理方式可降低双方协议分歧, 也是实现 SDK 端双向通信的参考蓝本。 + +#### 测试与示例 + +- **测试栈**: 采用 `pytest + pytest-asyncio` 与 `ruff + mypy` 形成与 Anthropic 仓库一致的质量门槛。 +- **示例**: 提供 `examples/quickstart.py`, `examples/mcp_calculator.py` 等, 展示工具注册、流式消费、权限回调落地。 + +### qwen-agent-sdk-typescript + +#### 运行时与分发 + +- **Node 要求**: Node.js 18+, 与 Anthropic TypeScript SDK 保持一致, 支持 `AbortController`, `EventTarget` 等 API。 +- **包结构**: 主包 `@qwen-agent/sdk` 使用 ESM 默认导出, 通过 `exports` 字段同时暴露 `import` 与 `require` 入口; 类型声明由 `TypeScript 5.x` 编译生成。 +- **构建流水线**: 采用 `tsup` 打包出 `dist/esm` 与 `dist/cjs`, 并生成 `dist/types`。 + +#### 核心依赖 + +- `@qwen-code/cli` (peerDependency): 由宿主应用负责安装, SDK 仅负责调度。 +- `zx`/`execa`: 子进程管理与跨平台管道封装。 +- `eventemitter3`: 会话事件派发。 + +#### API 设计 + +- **AgentClient**: `createAgentManager(options)` 返回具备 `createSession`, `run`, `forkSession` 能力的管理器, 语义对齐 Anthropic `ClaudeAgent`。 +- **流式 API**: `session.stream(task)` 返回 `AsyncIterable`, 支持 `for await` 迭代。 +- **权限体系**: 暴露 `onPermissionRequest` 回调, 允许应用返回 `allow/deny/ask` 与额外规则。 +- **自定义工具**: 支持依赖 MCP 服务 (`defineTools`), 允许通过 TypeScript 函数注册工具, 与 CLI 会话共享上下文。 +- **设置源控制**: `settingSources` 默认关闭, 需显式声明 `["user","project","local"]` 方可加载对应文件。 +- **子代理**: `agents` 选项允许内联定义多代理拓扑, 结合 `forkSession` 进行会话分支。 + +#### 技术实现要点 + +- **子进程编排**: 使用 `execa` 启动 `qwen` CLI, 统一将 stdout 解析为 `AgentStreamChunk`, 并通过 `AbortSignal` 支持取消。 +- **心跳与超时**: 管理器维护 `result/heartbeat` 定时器, 超时自动触发重启与会话恢复。 +- **权限同步**: 将 `onPermissionRequest` 结果转为 JSONL `control_response`, 保证与 Python 绑定行为一致。 +- **调试工具**: 提供 `enableVerboseLogging()` 开关, 输出 CLI 命令、payload、耗时指标。 +- **日志采集**: 默认记录进程级 stdout/stderr 与事件时间轴, 支持注入 `logger`/`stderr` 回调, 并规划接入 OpenTelemetry Trace 以跟踪会话与 Worker 生命周期。 +- **测试矩阵**: 使用 `vitest` + `tsx` 覆盖, 结合 `@qwen-code/cli` mock 校验流式输出与权限回调。 + +#### 双向控制协议实现参考 (对齐 Claude Agent SDK TS) + +- **统一 STDIO 通道**: 复用 CLI `stream-json` 定义的结构, `ProcessTransport` 将 stdout 逐行解码 (`JSON.parse`) 并通过 `EventEmitter` 推送 `control_request`、`result/*`、`chat.completion*` 等事件;所有反向 `control_response` 均通过同一子进程 stdin 写回, 遵循 RFC 中的 JSON Lines 约定。 +- **请求/响应路由**: `createAgentManager()` 在会话级维护 `pendingControl` 映射, `request_id` 作为 key, 保障 `control_request` 并发时的正确配对;若超时则触发 `AbortController.abort()` 并向 CLI 返回 `subtype:"error"`。 +- **权限与 Hook 回调**: `onPermissionRequest`、`onHookEvent` 等回调被包装为 Promise, 统一生成 `control_response` payload (`{"response":{"behavior":"allow"}}` 等); 若上层未注册回调, SDK 直接返回 RFC 规定的默认策略, 避免 CLI 阻塞。 +- **MCP 工具桥接**: `defineTools()` 将 TypeScript 函数组装为 SDK 依赖的 MCP server, CLI 预计通过 `control_request` 中的 `subtype:"mcp_message"` 事件发起 `tools/list`/`tools/call`(对齐 Claude 控制协议设计), SDK 使用 JSON-RPC 透传至 in-process server 并回写 `mcp_response` 字段, 行为与 Python 版本一致。 +- **初始化握手**: 会话启动时, SDK 主动等待 CLI 首条 `chat.completion` 握手元数据 (`protocol_version`,`capabilities`), 同时根据 RFC 在首个 `control_request{subtype:"initialize"}` 中附带 Hook 配置与工具能力声明, 以便 CLI 构建完整的会话态。 +- **异常降级**: 当反向回调抛出异常或序列化失败时, SDK 会记录 verbose 日志并发送 `control_response{subtype:"error"}`, 提醒 CLI 走安全回退路径 (例如拒绝危险命令), 与 Anthropics TypeScript SDK 的容错策略保持一致。 + +### 其它语言支持 (TODO) + +- **Go/Java**: TODO。 + +### 2. Qwen-Code Agent SDK (子进程调度层) + +> - IPC 封装: (StdinReader, StdoutWriter, 消息路由) +> - 控制协议: (ControlProtocolService, Hook Registry) + +Qwen-Code Agent SDK 直接管理 qwen-code CLI 子进程,负责通信、生命周期与权限控制。 + +- **IPC 封装**: + - 基于 STDIN/STDOUT 的 JSON Lines 协议,输入遵循 `docs/ipc/qwen-chat-request-schema.json`(扩展自 OpenAI `/chat/completions`,包含 `session_id`、`prompt_id`、`tool_call_id` 等会话字段)。 + - CLI 需提供 `--input-format {text,stream-json}` 与 `--output-format {text,stream-json,stream-chunk-json}` 参数,结构化模式自动禁用 TUI,仅 `text` 模式保留原有人机交互。 + - 输出逐行写入 OpenAI 风格的 `chat.completion` / `chat.completion.chunk` 对象;首条响应需在 `metadata` 中携带 `protocol_version`、`input_format`、`output_format` 以及 `capabilities`(需显式包含 `chat.completion.chunk` 能力位)等握手信息。 + - 事件语义需覆盖 `result/heartbeat`、`result/cancel`、`x-qwen-session-event` 与 `control_request/control_response`,并定义对应的错误对象与回退策略。 + - **当前状态**: qwen-code 仅支持简单的 STDIN 文本读取 (非 JSON Lines) + - **需要工作**: + - 新增 `StdinReaderService` (~200 行): 解析结构化请求并保持 `/`、`@`、`?` 命令的即时回执。 + - 新增 `StdoutWriterService` (~150 行): 输出 `chat.completion` / `chat.completion.chunk` JSON Lines,封装错误语义。 + - 改造 `main()` 入口支持握手元数据、事件分发与流式模式 (~100 行)。 + - 扩展 CLI 参数解析,落实完整格式选项及 TUI 自动禁用逻辑。 + - 实现 `result/heartbeat`、`result/cancel`、`x-qwen-session-event`、`control_request/control_response` 的调度骨架。 + +- **进程管理**: + - Worker 进程启动、监控、异常重启 + - 进程生命周期管理 + - 资源限制 (CPU/内存/超时) + - 关键日志采集 (启动/重启/超时/退出码), 输出到结构化日志或 OpenTelemetry 事件 + +- **控制协议**: + - 工具权限动态授予/撤销 + - Hook 回调 (pre-commit、post-execute 等) + - 会话级配置传递 + - **当前状态**: qwen-code 有 `ApprovalMode` 权限机制,但不支持程序化回调 + - **需要工作**: + - 新增权限回调接口 + - 新增 Hooks 系统和事件机制 + - 在工具执行流程中插入 Hook 点 + +### 3. Qwen-Code Workers 进程池 + +热启动的 CLI 进程池,每个 Worker 独立运行。 + +**环境说明**: Worker 本质是 qwen-code CLI 子进程,其容器/沙箱与工具桥接逻辑均由 CLI 自主管理,SDK 只负责通过 STDIN/STDOUT 进行调度与控制。 + +**Worker 状态机**: + +``` +空闲 (Idle) + ↓ [新会话分配] +占用 (Busy) - 绑定到 session_xxx + ↓ [会话结束] +空闲 (Idle) - 等待新会话复用 +``` + +**关键特性**: + +- **独占机制**:一个 Worker 一次只能服务一个会话,保证会话隔离。 +- **会话绑定**:Worker 与会话 ID 绑定,期间不接受其他任务。 +- **复用机制**: + - 会话结束后 Worker **不会自动销毁** + - Worker 返回空闲池,等待新会话分配 + - 新会话复用现有进程,创建新的会话上下文 + - 大幅减少冷启动时间,提高响应速度 + +**Worker 复用流程**: + +``` +1. 会话 A 结束 → Worker #1 状态变为 [空闲] +2. 新会话 B 到达 → 协调器分配 Worker #1 +3. Worker #1 状态变为 [占用 - session_B] +4. Worker #1 在同一进程内创建新会话上下文 +5. 会话 B 执行完毕 → Worker #1 再次变为 [空闲] +``` + +**进程池配置**: + +- `min_workers`:最小保活 Worker 数量 +- `max_workers`:最大 Worker 数量上限 +- `idle_timeout`:空闲 Worker 超时回收时间 (默认 30 分钟) +- `max_concurrent_sessions`:单 Worker 生命周期内最大服务会话数 + +## Worker 复用机制详解 + +### 为什么需要 Worker 复用? + +**问题**:每次新会话启动全新进程会导致: + +- 进程冷启动耗时 (3-5 秒) +- 模型加载耗时 (如果涉及本地模型) +- 资源开销大 (频繁创建/销毁进程) + +**方案**:Worker 进程复用 + +- 进程保持运行,会话结束后只清理会话上下文 +- 新会话到达时直接在现有进程中创建新会话 +- 响应速度提升 **10-20 倍** + +### 复用安全性保障 + +1. **会话隔离**: + - 每个会话独立的上下文空间 + - 会话结束时清理所有会话变量和状态 + - 下一个会话无法访问上一个会话的数据 + +2. **资源清理**: + - 临时文件自动清理 + - 环境变量重置 + - 打开的文件句柄关闭 + +3. **健康检查**: + - 定期检测 Worker 内存泄漏 + - 检测僵尸进程或卡死状态 + - 异常 Worker 自动重启 + +### 复用策略配置 + +```yaml +worker_pool: + # 最小保活 Worker 数 + min_workers: 5 + + # 最大 Worker 数 + max_workers: 50 + + # 空闲 Worker 超时回收 (秒) + idle_timeout: 1800 # 30 分钟 + + # 单个 Worker 最大服务会话数 (防止内存泄漏) + max_sessions_per_worker: 100 + + # Worker 健康检查间隔 (秒) + health_check_interval: 60 +``` + +### 日志与可观测要求 + +- **会话日志**: Worker 在会话开始、结束及异常重启时输出结构化 JSON 日志 (session_id、prompt_id、耗时、命令摘要), 支持通过 SDK 的 `logger`/`stderr` 回调或集中式日志系统采集。 +- **指标采集**: SDK 侧规划基于 OpenTelemetry 导出 Worker 池指标 (活跃/空闲数量、排队时长、重启次数、失败率), 与 CLI 层指标互补, 便于平台层聚合。 +- **链路追踪**: 为每个会话生成 Trace Span, 在 SDK → CLI → 工具调用之间传播, 方便定位跨组件延迟与失败点。 + +## 集成模式 + +### 宿主进程依赖 SDK + +- **适用场景**: IDE 插件、企业内部工具、CLI 扩展等需要最小化依赖的场合。 +- **关键特性**: + - SDK 直接在宿主进程内启动与管理 Worker 池 + - 通过 IPC JSONL 协议与 qwen-code CLI 通信 + - 可同步或流式获取会话输出 + +**快速上手示例**: + +```python +from qwen_agent_sdk import QwenClient + +with QwenClient(binary_path="qwen", model="qwen3-coder-plus") as client: + result = client.chat( + task="扫描并修复 main.py 中的潜在 bug", + workspace="/repos/demo" + ) + print(result.summary) +``` diff --git a/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_clear_cn.md b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_clear_cn.md new file mode 100644 index 000000000..d3ec35868 --- /dev/null +++ b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_clear_cn.md @@ -0,0 +1,608 @@ +# RFC: Qwen-Code CLI 结构化输入输出规范(整理版) + +## 概览 + +| 字段 | 详情 | +| --- | --- | +| 状态 | Draft | +| 更新时间 | 2025-10-13 | +| 作者 | x22x22 | +| 追踪 | | +| 范围 | CLI 层 `--input-format/--output-format` 结构化协议、事件语义、错误规范与落地计划 | + +- 目标是为第三方系统与多语言 Agent SDK 提供稳定、可编程的 IPC Stream JSON 能力。 +- 协议保持与 TUI 相同的行为,补齐 JSON Lines 输出、对称输入以及控制通道,回应社区关于 `--input-format/--output-format json/stream-json` 的诉求。 +- 文档聚焦 CLI 侧能力,不涵盖 SDK 内部设计。 + +## 背景与场景 + +### Issue 795 概述 +- 社区在 issue [#795](https://github.com/QwenLM/qwen-code/issues/795) 中请求为 CLI 增加 `--input-format/--output-format json/stream-json`,希望参考 Claude Code 的实现,提供可被程序稳定消费的结构化 IO。 +- RFC 旨在扩展 CLI,使其在保持 TUI 兼容的同时,为 SDK 与第三方后端提供对称的 JSON 协议与消息语义。 + +### 集成方场景 +- **任务级串行处理**:SDK 逐条发送 prompt,同时在多个阶段接收 CLI 返回的数据并二次处理后再展示给最终用户。 +- **任务级流式直传**:SDK 逐条发送 prompt,CLI 的流式响应被直接转发给用户,保持实时可见性。 +- **指令提示与快捷符号**:第三方输入框中的 `/`、`@`、`?` 等触发行为需要与 TUI 完全一致,确保不同前端体验统一。 +- **前端终端模拟**:利用 xterm.js 等库复刻终端交互,输入区域与终端输出分离,但仍需消费 CLI 的完整终端语义。 + +### 集成方式 +- 第三方程序依赖后续提供的多语言 `qwen-code-agent-sdk`。 +- SDK 通过子进程方式启动 `qwen code`,并以 STDIN/STDOUT 建立双向 IPC。 +- SDK 负责读取 CLI 的结构化输出,并完成错误处理与状态管理。 +- 第三方应用消费 SDK 的结果,在自身 UI 或后端逻辑中呈现。 + +### 现状痛点 +- CLI 仅面向人工交互的纯文本 STDOUT,输出语义不稳定,难以被自动化消费。 +- 缺乏对称的结构化输入协议,无法驱动高级自动化与工具调度流程。 + +## 目标与范围 + +| 类型 | 内容 | +| --- | --- | +| 设计目标 | 可配置输出格式、JSON Lines 流式协议、对称结构化输入、通用 schema、面向 SDK 友好化 | +| 非目标 | 描述 SDK 具体实现(另见 Agent 框架文档) | +| 核心痛点 | 仅有人机交互 STDOUT、缺少结构化输入、无法驱动自动化流程 | +| 场景示例 | SDK 分批发送 prompt 并处理多段响应;流式直传用户;`/`,`@`,`?` 指令与 TUI 对齐;xterm.js 前端分离输入与终端 | + +## 接口总览 + +| 类别 | 关键项 | 说明 | +| --- | --- | --- | +| CLI 参数 | `--input-format`、`--output-format` | 取值 `text` / `stream-json` / `stream-chunk-json`,结构化模式自动禁用 TUI | +| 输出事件 | `chat.completion*`、`result/*`、`control_request` | 全部以 JSON Lines 逐行写入 STDOUT | +| 输入事件 | `*request`、`control_response`、Qwen Chat Request | JSON 行写入 STDIN,对称驱动 CLI 行为 | +| 通道语义 | 结果事件、请求事件、控制通道 | 明确回执要求,防止 CLI 阻塞 | +| 协议扩展 | 握手元数据、版本协商、错误语义 | 与 OpenAI `/chat/completions` 保持兼容扩展 | + +- 通信仍使用标准输入输出,未引入额外套接字。 +- `text` 模式保留原行为,结构化模式提供稳定 schema 与可观测字段。 + +## 输出格式语义 + +| 格式 | 适用场景 | 行为概要 | 兼容性 | +| --- | --- | --- | --- | +| `text` | 人机交互兼容模式 | 输出原有 TUI 文本 | 默认模式,后续标记为手动使用 | +| `stream-json` | 消息级 JSONL | 每行 `chat.completion`,含初始化回执、助手回复、工具调用、收尾摘要 | 对齐 OpenAI `/chat/completions` | +| `stream-chunk-json` | 增量 chunk JSONL | 每行 `chat.completion.chunk`,`choices[].delta` 承载 token/块增量 | 对齐 OpenAI 流式响应,覆盖完整会话 ID | + +### 消费策略 + +- **消息级 JSONL(`stream-json`)**:适合第三方后端或 CLI 包装器按阶段消费结果,与现有 JSONL 管线兼容,是默认结构化模式。 +- **增量 chunk(`stream-chunk-json`)**:面向 IDE/UI“边生成边展示”的实时场景,SDK 需监听 `chat.completion.chunk` 并在收到最终 `finish_reason` 时收尾。 +- **终端语义一致性**:无论 CLI 在文本模式还是结构化模式,`stream-json` 与 `stream-chunk-json` 都必须完整覆盖 TUI 向标准输出写入的全部语义(文本、ANSI/Vt100 控制、工具提示、退出码等),并通过 `choices[].message.content` / `choices[].delta.content` 与 `choices[].delta.annotations`(如 `{"type":"x-qwen-ansi","value":"\u001b[32m"}`)编码,便于 xterm.js 等终端完全还原效果。 + +### `stream-json` 示例 + +```json +{"object":"chat.completion","id":"chatcmpl-session-123","created":1739430000,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","content":"正在分析...","tool_calls":null},"finish_reason":"stop"}],"usage":{"prompt_tokens":1200,"completion_tokens":80,"total_tokens":1280}} +{"object":"chat.completion","id":"chatcmpl-session-123","created":1739430002,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","tool_calls":[{"id":"tool-1","type":"function","function":{"name":"edit_file","arguments":"..."}}]},"finish_reason":"tool_calls"}]} +{"object":"chat.completion","id":"chatcmpl-session-123","created":1739430010,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","content":"修复完成,已更新文件。"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1600,"completion_tokens":200,"total_tokens":1800}} +``` + +### `stream-chunk-json` 行为要点 + +- 首行发送 `{"object":"chat.completion.chunk","choices":[{"delta":{"role":"assistant"}}]}` 声明角色。 +- 按需输出文本 token、工具调用增量、`tool_calls` 更新。 +- 最后一行包含 `{"choices":[{"delta":{},"finish_reason":"stop"}]}`,并在 `usage` 或 `metadata` 中附带总结。 +- 可选 `annotations`、`spans` 字段详述终端样式(见下节)。 + +## 事件载荷与注解 + +`packages/cli/src/nonInteractiveCli.ts` 与 `packages/cli/src/ui/utils/ConsolePatcher.ts` 共同决定了文本模式输出的来源:模型内容通过 `GeminiEventType.Content` 写入 `stdout`,工具执行状态与日志由 `ConsolePatcher` 打印至 `stderr`,工具结果结构体 (`ToolResultDisplay` 等) 则在 `packages/cli/src/ui/hooks/useReactToolScheduler.ts` 中驱动 UI 渲染。为保证 `stream-json` / `stream-chunk-json` 能完整覆盖这些语义,整理版沿用原 RFC 在 OpenAI `annotations` 字段上的扩展约定: + +| 类型 | 主要字段 | 用途 | +| --- | --- | --- | +| `chat.completion.chunk` 注解 | `annotations`、`spans` | 复刻终端风格、ANSI 控制、来源标记 | +| `x-qwen-terminal` | `channel`、`source`、`console_level`、`ansi` | 输出终端流(stdout/stderr/console/system) | +| `x-qwen-tool-display` | `tool_call_id`、`status`、`result_display` | 呈现工具 diff、字符串、TODO、计划摘要、任务执行等 | +| `x-qwen-thought` | `subject`、`description` | 展示思考中提示(GeminiEventType.Thought) | +| `x-qwen-session-event` | `event`、`message`、`metrics` | 会话级提示,如压缩、取消、token 限制 | + +### 终端注解结构 + +```json +{ + "type": "x-qwen-terminal", + "channel": "stdout", + "source": "assistant", + "spans": [ + {"start": 0, "end": 24, "style": {"theme_token": "AccentGreen"}} + ], + "ansi": [ + {"offset": 0, "code": "\u001b[32m"}, + {"offset": 24, "code": "\u001b[0m"} + ], + "console_level": "info", + "exit_code": null, + "prompt_id": "session-123########7" +} +``` + +- `channel`: `stdout` / `stderr`,控制台日志通过 `ConsolePatcher` 注入 `stderr` 与 `console_level`。 +- `source`: `assistant`、`tool`、`console`、`system`;便于前端分层展示。 +- `spans.style.theme_token`: 复用 CLI 主题 (`AccentGreen`、`DiffAdded` 等)。 +- `ansi`: 原始 ANSI 序列位置,方便前端重放。 +- `console_level`: 当 `source=console` 时取 `log` / `warn` / `error` / `info` / `debug`,与 `ConsolePatcher` 输出保持一致。 +- `exit_code`: 当 `source=system` 且流程结束时给出退出码。 +- `prompt_id`: 关联到具体回合。 + +### 工具结果展示 + +```json +{ + "type": "x-qwen-tool-display", + "tool_call_id": "call_tool-1", + "session_id": "session-123", + "status": "executing", + "result_display": { + "kind": "file_diff", + "file_name": "src/main.py", + "diff": "--- a/src/main.py\n+++ b/src/main.py\n@@ -1 +1 @@\n-print('Hi')\n+print('Hello')", + "original": "print('Hi')\n", + "modified": "print('Hello')\n", + "stat": { + "ai_added_lines": 1, + "ai_removed_lines": 1, + "user_added_lines": 0, + "user_removed_lines": 0 + } + }, + "confirmation": null, + "pending": false, + "timestamp": 1739430005 +} +``` + +- `status` 取自 `ToolCallStatus`(`Pending`、`Executing`、`Success`、`Error`、`Canceled`、`Confirming`)。 +- `tool_call_id` 复用 OpenAI schema 字段名,结合 `session_id` 可唯一定位调用;在测试或非会话模式下也可单独使用。 +- `result_display` 支持 `string`、`file_diff`、`todo_list`、`plan_summary`、`task_execution` 等 union。 +- `confirmation` 序列化 `ToolCallConfirmationDetails` 中的 diff、命令、计划等信息,便于第三方弹窗确认。 +- `pending=true` 表示调用仍在验证/排队阶段,尚未交给执行器,与 `ToolCallStatus.Pending` 等价。 +- `timestamp` 用于排序,与 `useReactToolScheduler` 记录一致。 + +### 思考与会话事件 + +```json +{ + "type": "x-qwen-thought", + "subject": "Analyzing repo", + "description": "Listing tsconfig patterns..." +} +``` + +```json +{ + "type": "x-qwen-session-event", + "event": "MAX_TOKENS", + "message": "Response truncated due to token limits.", + "metrics": { + "original_tokens": 12000, + "compressed_tokens": 8000 + } +} +``` + +- `event` 取值来自 `GeminiEventType`,包括 `Finished`、`ChatCompressed`、`MaxSessionTurns`、`USER_CANCELLED` 等。 +- `metrics` 可选地提供压缩前后 token 数等统计。 + +## 输入格式(Qwen 会话协议) + +| 模式 | 行为 | 说明 | +| --- | --- | --- | +| `text` | 保留原始 TUI 文本输入 | 解析自然语言或命令行文本 | +| `stream-json` / `stream-chunk-json` | 采用 Qwen Chat Request | 每行 JSON 描述一次增量输入 | + +### Qwen Chat Request 模式 + +```jsonc +{ + "session_id": "session-123", + "prompt_id": "session-123########7", + "model": "qwen-coder", + "input": { + "origin": "user", + "parts": [ + {"type": "text", "text": "请修复 @main.py 的 bug"} + ], + "command": null + }, + "options": { + "temperature": 0.2, + "tool_overrides": ["EditTool"] + } +} +``` + +- `session_id`:会话主键(`config.getSessionId()`),传入 `"_new"` 可创建新会话。 +- `prompt_id`:区分回合;默认格式 `########`,须在工具续写时复用。 +- `input.origin`:`user` / `tool_response` / `system`,决定会话续写逻辑。 +- `input.parts`:兼容 `@google/genai` PartListUnion,允许 `text`、`function_response`、`file_data` 等;当 `origin="user"` 时,CLI 会将所有 `text` part 顺序拼接并复用 `prepareQueryForGemini` 的语义处理。 +- `options`:单次请求参数覆写(模型、采样、工具白名单)。 +- 扩展字段: + - `tool_call_id`:`origin=tool_response` 时必填,用于匹配输出事件。 + - `continuation`: 布尔值,等价 `submitQuery(...,{isContinuation:true})`;缺省时 CLI 会根据 `origin` 与命令上下文判断是否续写。 + - `tool_request`: 镜像 `ToolCallRequestInfo`,支撑并发工具与子代理。 + +### 会话控制 + +- 第三方可调用 CLI 提供的“创建会话”命令或复用已有 ID;当 `session_id` 缺失或指定为 `"_new"` 时,CLI 会在首个 `chat.completion` 中返回实际 ID。 +- `prompt_id` 与 `tool_call_id` 共同确保并发流程隔离,需在工具回传与续写时保持一致;默认格式 `########`,也可自定义但必须唯一。 +- 使用 `input.origin="system"` 并在 `parts` 中发送 `{"type":"instruction","text":"/clear"}` 等指令,可触发与 TUI 相同的 slash 命令逻辑。 +- 当 `origin="tool_response"` 时,必须提供 `tool_call_id` 以关联输出事件;CLI 会将结果写入对应回合并继续调度。 + +### 命令与 `@` 引用 + +| 模式 | 触发方式 | 行为 | +| --- | --- | --- | +| 隐式解析 | `origin="user"` + 文本以 `/`/`?`/`@` 开头 | CLI 自动走 slash/at 流程,调用 `handleAtCommand` 等逻辑 | +| 显式声明 | `input.command` 描述命令 | 推荐给第三方,避免字符串解析歧义 | + +- `command.kind`: `slash` / `at`,与 TUI 命令分类一致。 +- `command.path`: 对于 slash 命令是层级数组(等价 `commandPath`),`at` 模式可省略。 +- `command.args`: 剩余参数字符串,CLI 会按原逻辑解析。 +- `input.command.references` 支持在 `kind="at"` 时直接提供已解析的引用列表,例如 `{ "original":"@src/main.ts","resolved":"src/main.ts" }`,CLI 会基于显式路径读取文件。 +- 若未传入 `references`,CLI 将回退到 `handleAtCommand` 与 `useAtCompletion` 的自动解析逻辑,以保持与 TUI 相同的容错能力。 + +#### 显式 slash 命令示例 + +```jsonc +{ + "session_id": "session-123", + "prompt_id": "session-123########8", + "input": { + "origin": "user", + "parts": [{"type": "text", "text": ""}], + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + } + } +} +``` + +#### 显式 `@` 引用示例 + +```jsonc +{ + "session_id": "session-123", + "input": { + "origin": "user", + "parts": [{"type": "text", "text": "请审阅 @src/main.py"}], + "command": { + "kind": "at", + "references": [ + {"original": "@src/main.py", "resolved": "src/main.py"} + ] + } + } +} +``` + +### SDK 侧命令协作 + +| `command.result.type` | 说明 | SDK 建议动作 | +| --- | --- | --- | +| `handled` | 命令已在 CLI 内部完成 | 无需额外处理 | +| `message` | 返回信息或错误 | 直接在 UI 显示通知 | +| `dialog` (`auth`/`theme`/`editor`/`privacy`/`settings`/`model`/`subagent_create`/`subagent_list`/`help`) | 需要弹窗或页面跳转 | 在第三方界面呈现对应对话框 | +| `tool` | 触发工具调用 | 将 `tool_request` 或命令参数转为工具请求并继续监听结果 | +| `submit_prompt` | 立即发送 PartListUnion 至模型 | 将 `content` 作为下一条输入并设置 `continuation=true` | +| `load_history` | 重置或加载指定会话历史 | 触发历史刷新或重新加载 | +| `quit` / `quit_confirmation` | 退出流程或等待用户确认 | 控制宿主应用生命周期并回传确认结果 | +| `confirm_shell_commands` | 待确认 shell 命令 | 弹窗确认,批准后携带 `approvedCommands`/`confirmationOutcome` 再次调用 | +| `confirm_action` | 需要确认提示 | 提供确认按钮并返回结构化结果 | + +- SDK 应暴露统一命令执行 API,将用户输入映射为上述 `command` 结构,并根据 `result` 类型驱动本地 UI 或后续请求。 +- CLI 会使用显式 `references` 读取文件;若缺失则自动回退到 `handleAtCommand` 的解析流程,保证行为与 TUI 完全一致。 +- 命令触发后续模型调用时,CLI 会继续输出 `assistant`、`tool_call` 与 `result/*` 事件,顺序保持与 TUI 相同,使第三方可以通过纯文本输入与 JSON 输出复现完整交互。 + +### STDIN 命令回执 + +- 当 `--input-format=stream-json` 时,CLI 必须对 `/`、`?`、`@` 等命令保持即时反馈,解析逻辑沿用 `useSlashCommandProcessor` 与 `handleAtCommand`。 +- 命令解析完成后,CLI 将向 STDOUT 写出结构化响应: + +```jsonc +{ + "type": "result/command", + "session_id": "session-123", + "prompt_id": "session-123########8", + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + }, + "result": { + "type": "message", + "level": "info", + "content": "当前会话共有 3 条历史记录" + } +} +``` + +- `result` 字段遵循上表所列 `command.result.type` 枚举,便于 SDK 在收到 `stream-json`/`stream-chunk-json` 时立即驱动 UI。 +- 若命令触发进一步的模型交互(如 `/submit`、`@file` 展开),CLI 会在同一会话中继续串联对应事件并保持字段一致。 + +## 实时提示、心跳与中断 + +| 能力 | 请求 | 响应 | 说明 | +| --- | --- | --- | --- | +| 命令提示 | `command_hint_request` | `result/command_hint` | 字符触发提示;`trigger` 支持 `slash`、`at`;`status` 可为 `ok` / `loading` / `error` | +| 心跳 | `heartbeat_request` | `result/heartbeat` | 定期保活;CLI 可主动推送同结构事件 | +| 中断/取消 | `control/cancel` | `result/cancel` + `control_response` | 模拟 ESC;`reason` 当前固定 `escape` | + +### 提示请求示例(`/c`) + +```jsonc +{ + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "text": "/c", + "cursor": 2, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } +} +``` + +### 提示响应示例 + +```jsonc +{ + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "status": "ok", + "suggestions": [ + {"label": "chat", "value": "chat", "description": "Manage conversation history."}, + {"label": "clear", "value": "clear", "description": "clear the screen and conversation history"}, + {"label": "compress", "value": "compress", "description": "Compresses the context by replacing it with a summary."}, + {"label": "copy", "value": "copy", "description": "Copy the last result or code snippet to clipboard"}, + {"label": "corgi", "value": "corgi", "description": "Toggles corgi mode."} + ], + "metadata": { + "is_perfect_match": false + } +} +``` + +### 提示请求示例(`@src/co`) + +```jsonc +{ + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "at", + "text": "@src/co", + "cursor": 7, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } +} +``` + +### 提示响应示例(`@src/co`) + +```jsonc +{ + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "at", + "status": "ok", + "suggestions": [ + {"label": "src/components/", "value": "src/components/"}, + {"label": "src/components/Button.tsx", "value": "src/components/Button.tsx"}, + {"label": "src/components/Button with spaces.tsx", "value": "src/components/Button\\ with\\ spaces.tsx"} + ], + "metadata": { + "is_perfect_match": false + } +} +``` + +### 提示请求示例(`/?`) + +```jsonc +{ + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "text": "/?", + "cursor": 2, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } +} +``` + +### 提示响应示例(`/?`) + +```jsonc +{ + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "status": "ok", + "suggestions": [ + { + "label": "help", + "value": "help", + "description": "for help on Qwen Code", + "matchedIndex": 0 + } + ], + "metadata": { + "is_perfect_match": true + } +} +``` + +- `suggestions` 结构复用 TUI `Suggestion`,`status="loading"` 表示 CLI 正在准备数据,`error` 时附带 `message`。 +- CLI 内部复用 `useSlashCompletion`、`useAtCompletion` 生成提示;这些请求不会写入会话历史,`prompt_id` 可使用 `_preview` 后缀并在响应中原样返回。 +- 支持连续触发:输入或光标变化时可重复发送 `command_hint_request`,CLI 负责节流并返回最新结果;取消提示时发送 `{"type":"command_hint_cancel",...}`。 + +### 心跳请求与响应 + +```jsonc +{"type":"heartbeat_request","session_id":"session-123"} +{"type":"result/heartbeat","session_id":"session-123","status":"ok","ts":1739430123} +``` + +- 第三方可配置超时(如 10 秒)判断 CLI 是否挂起并执行重启。 +- CLI 会按相同结构回复 `result/heartbeat`,也可在后台主动推送保活事件。 +- `@third-party/anthropics/claude-agent-sdk-python` 目前缺少心跳实现;P1.1 落地时需定义默认间隔、超时策略,并允许 SDK 自定义心跳频率。 + +### 中断示例 + +```jsonc +{ + "type": "control/cancel", + "session_id": "session-123", + "prompt_id": "session-123########8", + "reason": "escape" +} +``` + +- CLI 必须调用 `cancelOngoingRequest`,中止 `AbortController`,补齐历史项并发出 `result/cancel`。 +- 若当前无可取消请求,CLI 应返回 `{"type":"result/cancel","status":"noop"}` 并说明原因。 +- 当底层流返回 `GeminiEventType.UserCancelled` 事件时,CLI 需追加 `{"type":"x-qwen-session-event","event":"USER_CANCELLED","message":"User cancelled the request."}` 提示会话被中断。 +- 双击 ESC 清空输入属于客户端自身逻辑,结构化模式的集成方可在本地复用该交互,无需额外向 CLI 发送消息。 + +## 事件分类与通道 + +| 类别 | 方向 | 代表事件 | 回执要求 | 作用 | +| --- | --- | --- | --- | --- | +| 结果事件 | CLI → STDOUT | `result/command`、`result/command_hint`、`result/heartbeat`、`result/cancel`、`x-qwen-session-event` | 无需回执 | 发布命令输出、状态提示、心跳结果 | +| 请求事件 | SDK/第三方 → STDIN | `command_hint_request`、`heartbeat_request`、`control/cancel` | CLI 返回对应 `result/*` | 触发即时行为或控制 | +| 控制通道 | CLI ↔ STDIN/STDOUT | `control_request` / `control_response` | 必须匹配 `request_id` | 权限审批、Hook 回调、MCP 调用 | + +按语义还可归纳三类事件机制: + +1. **结果事件 (`result/*`)** + - CLI → STDOUT 的单向通告,承载命令输出、提示建议、心跳反馈、会话状态更新等信息。 + - 不要求第三方回执,可直接用于 UI 展示与日志记录。 +2. **请求事件 (`*request`)** + - 第三方 → STDIN 的即时指令,例如 `command_hint_request`、`heartbeat_request`、`control/cancel`。 + - CLI 会以对应的 `result/*` 响应,确保提示、保活与取消流程与 TUI 一致。 +3. **控制通道事件 (`control_request` / `control_response`)** + - CLI 输出 `control_request`、第三方写回 `control_response`,用于工具授权、Hook 回调、MCP 调用等需要回执的场景。 + - 每条请求含唯一 `request_id`,必须在合理超时内返回结果,该类事件不写入会话历史而走控制层旁路。 + +- 所有事件通过统一 JSON Lines 协议传输,集成方应按 `type`/`subtype` 路由;对控制通道需实现超时与错误回退策略,以避免 CLI 阻塞并保持与 TUI 行为一致。 + +## 控制请求与响应 + +| 字段 | 说明 | +| --- | --- | +| `type` | 固定 `control_request` 或 `control_response` | +| `request_id` | 唯一标识,请求与响应配对 | +| `subtype` | `can_use_tool`、`hook_callback`、`mcp_message` 等 | +| `payload` / `response` | 携带事件明细或回执内容 | + +- `control_request` 示例: + +```jsonc +{ + "type": "control_request", + "request_id": "req-1", + "subtype": "can_use_tool", + "session_id": "session-123", + "prompt_id": "session-123########8", + "tool": { + "name": "edit_file", + "arguments": {"path": "main.py", "patch": "..."} + }, + "metadata": { + "reason": "apply_diff" + } +} +``` + +- 对应 `control_response`: + +```jsonc +{ + "type": "control_response", + "request_id": "req-1", + "response": { + "subtype": "success", + "result": { + "behavior": "approve", + "message": "允许执行" + } + } +} +``` + +- 代码现状:当前 CLI 仅支持单向输出,TUI 内的确认/对话框逻辑在进程内处理,缺少 Claude Code 样式的 `control_request`/`control_response` hook。 +- 设计需求:当 CLI 需要外部确认或回执时输出 `control_request`,第三方在 STDIN 写入匹配的 `control_response` 完成授权或补充信息,覆盖工具审批、Hook 回调、MCP JSON-RPC 等场景。 +- 场景覆盖:`/confirm_shell_commands`、`confirm_action`、`quit_confirmation`、工具权限审批、子代理调度,以及未来的弹窗、表单、身份验证流程。 +- 回退策略:若控制通道未启用,CLI 应显式拒绝危险操作或提示“结构化模式下不可用”,并通过 `result/command` 返回错误,避免静默失败。 +- 后续工作:1)在 RFC 中追加 `control_request`/`control_response` JSON Schema;2)在 CLI 内抽象统一控制消息分发层,让 TUI 与 CLI 复用逻辑;3)在 SDK 中实现监听与响应,向上层 UI 暴露钩子。 +- 若回调异常,SDK 需返回 `{"subtype":"error","error":{"message":"...", "retryable":false}}`,CLI 按协议走安全回退(自动拒绝或提示失败)。 +- MCP 集成:`subtype:"mcp_message"` 承载 JSON-RPC (`tools/list`、`tools/call` 等),SDK 将结果封装为 `control_response` 内的 `mcp_response`。 + +## 版本协商与错误语义 + +| 项目 | 说明 | +| --- | --- | +| 协议版本 | 首个 `chat.completion` 的 `metadata` / `system_fingerprint` 携带 `protocol_version`、`input_format`、`output_format`、`capabilities` | +| 版本不匹配 | 若 SDK 请求超出能力,CLI 返回 `finish_reason="error"`,在 `metadata.error` 中标记 `unsupported_protocol` 并以非零退出码终止 | +| 致命错误 | 输出 OpenAI 风格错误对象并退出 | +| 可恢复错误 | 通过 `chat.completion` 返回错误信息,`finish_reason` 为 `stop/tool_calls`,进程保持健康 | +| 控制协议错误 | 在 `metadata.control_errors` 中附带详情,供 SDK 决定重试或终止 | + +致命错误示例: + +```json +{ + "error": { + "message": "invalid tool input", + "type": "invalid_request_error", + "param": "tools[0].function.arguments", + "code": "QWEN_INVALID_TOOL_ARGS" + } +} +``` + +## 安全与资源控制 + +| 领域 | 策略 | +| --- | --- | +| 权限 | 结构化模式仍遵循 CLI 现有 Approval / 工具白名单;无回执时默认拒绝高风险操作 | +| 审计 | 控制通道允许 SDK 在敏感动作前进行审计;未启用时需在 `result/command` 明确提示 | +| 保活 | `heartbeat` 事件可触发进程回收、避免资源泄漏 | + +## 日志分层与可观测 + +| 组件 | 要点 | +| --- | --- | +| ConsolePatcher | 拦截 `console.*`,在 `x-qwen-terminal` 中记录 `channel="stderr"`、`console_level` | +| log_scope 扩展 | 建议在结构化模式下为注解附加 `log_scope`(`system`、`tool`、`debug`),与 `ConfigLogger` 级别对齐 | +| 工具日志 | 通过 `ToolResultDisplay` 输出,可在 `result_display` 中附带 `log_scope` 便于过滤 | +| OTel 规划 | SDK/CLI 分别接入 OpenTelemetry,串联 Trace/Span | + +- 建议第三方记录完整消息序列到审计日志,便于重放。 + +## 调试示例 + +```bash +echo '{"model":"qwen-coder","messages":[{"role":"user","content":"你好"}]}' \ + | qwen --input-format stream-json --output-format stream-json + +echo '{"model":"qwen-coder","messages":[{"role":"user","content":"逐字输出问候"}]}' \ + | qwen --input-format stream-json --output-format stream-chunk-json +``` + +- 命令可用于快速验证输出格式与事件流。 diff --git a/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_clear_en.md b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_clear_en.md new file mode 100644 index 000000000..edbca6a31 --- /dev/null +++ b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_clear_en.md @@ -0,0 +1,656 @@ +# RFC: Qwen-Code CLI Structured Input/Output Specification (Clean Version) + +## Overview + +| Field | Details | +| --- | --- | +| Status | Draft | +| Last Updated | 2025-10-13 | +| Author | x22x22 | +| Tracking | | +| Scope | CLI-level `--input-format/--output-format` structured protocol, event semantics, error specification, and rollout plan | + +- Aims to provide third-party systems and multi-language Agent SDKs with stable, programmable IPC Stream JSON capabilities. +- Keeps protocol behavior aligned with the TUI, adds JSON Lines output, symmetric input, and control channels to address community requests for `--input-format/--output-format json/stream-json`. +- The document focuses on CLI capabilities and does not cover SDK internals. + +## Background and Scenarios + +### Issue 795 Overview +- Community issue [#795](https://github.com/QwenLM/qwen-code/issues/795) requests `--input-format/--output-format json/stream-json`, aiming to mirror the Claude Code implementation so downstream programs can reliably consume structured IO. +- This RFC expands the CLI while staying compatible with existing TUI behavior, giving SDKs and backends symmetric JSON protocols and message semantics. + +### Integration Scenarios +- **Task-level staged handling**: SDK submits prompts one by one, receives multiple partial results, post-processes them, and finally presents them to end users. +- **Task-level streaming**: SDK submits prompts and streams the CLI’s response directly to users for real-time visibility. +- **Instruction shortcuts**: Third-party input boxes must support `/`, `@`, and `?` triggers with behavior identical to the TUI to guarantee a consistent UX. +- **Terminal simulation**: Frontends such as xterm.js mimic terminal output while separating the input field from the display, yet still need full terminal semantics from the CLI. + +### Integration Approach +- Third parties will rely on forthcoming multi-language `qwen-code-agent-sdk` packages. +- The SDK spawns `qwen code` as a subprocess and uses STDIN/STDOUT for bidirectional IPC. +- SDKs read the CLI’s structured output and handle errors and state transitions. +- Upper-layer applications consume the SDK’s results and render them in their own UI or services. + +### Current Pain Points +- The CLI currently emits human-oriented text, making structured consumption brittle. +- There is no symmetric structured input protocol, preventing higher-level automation and tool orchestration. + +## Goals and Scope + +| Type | Content | +| --- | --- | +| Design Goals | Configurable output formats, JSON Lines streaming protocol, symmetric structured input, common schemas, SDK-friendly | +| Non-Goals | Describing concrete SDK implementations (see the Agent Framework document) | +| Core Pain Points | Only human-readable STDOUT, lack of structured input, cannot drive automation | +| Scenario Examples | SDK sending prompts in batches and handling multi-segment responses; streaming directly to users; `/`, `@`, `?` commands matching TUI behavior; xterm.js frontends splitting input from terminal display | + +## Interface Overview + +| Category | Key Items | Description | +| --- | --- | --- | +| CLI Parameters | `--input-format`, `--output-format` | Values: `text` / `stream-json` / `stream-chunk-json`; structured modes automatically disable the TUI | +| Output Events | `chat.completion*`, `result/*`, `control_request` | Written to STDOUT line by line as JSON Lines | +| Input Events | `*request`, `control_response`, Qwen Chat Request | Written to STDIN as JSON Lines to drive CLI behavior symmetrically | +| Channel Semantics | Result events, request events, control channel | Clarify acknowledgment requirements to prevent CLI blocking | +| Protocol Extensions | Handshake metadata, version negotiation, error semantics | Remain compatible extensions to OpenAI `/chat/completions` | + +- Communication still uses standard input and output; no extra sockets are introduced. +- `text` mode preserves the original behavior, while structured modes provide stable schemas and observability fields. + +## Output Format Semantics + +| Format | Applicable Scenarios | Behavior Summary | Compatibility | +| --- | --- | --- | --- | +| `text` | Human-interactive compatibility mode | Outputs the original TUI text | Default mode, to be marked for manual use later | +| `stream-json` | Message-level JSONL | Each line is a `chat.completion`, including initialization receipts, assistant replies, tool invocations, and closing summaries | Aligns with OpenAI `/chat/completions` | +| `stream-chunk-json` | Incremental chunk JSONL | Each line is a `chat.completion.chunk`, with `choices[].delta` carrying token/block increments | Aligns with OpenAI streaming responses, with full session IDs | + +### Consumption Strategy + +- **Message-level JSONL (`stream-json`)**: Suited to third-party backends or CLI wrappers that process results in stages; compatible with existing JSONL pipelines and considered the default structured mode. +- **Incremental chunks (`stream-chunk-json`)**: Targets IDE/UI scenarios that need “display as it streams”; SDKs should watch for `chat.completion.chunk` events and finalize once the terminal `finish_reason` arrives. +- **Terminal semantics parity**: Regardless of whether the CLI runs in text or structured mode, `stream-json` / `stream-chunk-json` must cover every semantic emitted by the TUI (text, ANSI/Vt100 control codes, tool hints, exit codes, etc.) using `choices[].message.content` / `choices[].delta.content` plus `choices[].delta.annotations` (e.g. `{"type":"x-qwen-ansi","value":"\u001b[32m"}`) so frontends like xterm.js can faithfully reproduce the terminal experience. + +### `stream-json` Example + +```json +{"object":"chat.completion","id":"chatcmpl-session-123","created":1739430000,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","content":"Analyzing...","tool_calls":null},"finish_reason":"stop"}],"usage":{"prompt_tokens":1200,"completion_tokens":80,"total_tokens":1280}} +{"object":"chat.completion","id":"chatcmpl-session-123","created":1739430002,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","tool_calls":[{"id":"tool-1","type":"function","function":{"name":"edit_file","arguments":"..."}}]},"finish_reason":"tool_calls"}]} +{"object":"chat.completion","id":"chatcmpl-session-123","created":1739430010,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","content":"Fix completed; the file has been updated."},"finish_reason":"stop"}],"usage":{"prompt_tokens":1600,"completion_tokens":200,"total_tokens":1800}} +``` + +### `stream-chunk-json` Behavior Highlights + +- First line sends `{"object":"chat.completion.chunk","choices":[{"delta":{"role":"assistant"}}]}` to declare the role. +- Outputs text tokens, tool call increments, and `tool_calls` updates as needed. +- Final line includes `{"choices":[{"delta":{},"finish_reason":"stop"}]}` and attaches a summary in `usage` or `metadata`. +- Optional `annotations` and `spans` fields describe terminal styling (see below). + +## Event Payloads and Annotations + +`packages/cli/src/nonInteractiveCli.ts` and `packages/cli/src/ui/utils/ConsolePatcher.ts` jointly decide how text-mode output is produced: model content is appended to `stdout` via `GeminiEventType.Content`, tool execution state and logs are printed to `stderr` through `ConsolePatcher`, and tool result structures (`ToolResultDisplay`, etc.) are rendered by `packages/cli/src/ui/hooks/useReactToolScheduler.ts`. To ensure `stream-json` / `stream-chunk-json` covers every semantic, we reuse the OpenAI `annotations` field with the following conventions: + +| Type | Key Fields | Purpose | +| --- | --- | --- | +| `chat.completion.chunk` annotation | `annotations`, `spans` | Reproduce terminal styling, ANSI control codes, and origin labels | +| `x-qwen-terminal` | `channel`, `source`, `console_level`, `ansi` | Output terminal streams (stdout/stderr/console/system) | +| `x-qwen-tool-display` | `tool_call_id`, `status`, `result_display` | Present tool diffs, strings, TODOs, plan summaries, task execution, etc. | +| `x-qwen-thought` | `subject`, `description` | Show thinking prompts (GeminiEventType.Thought) | +| `x-qwen-session-event` | `event`, `message`, `metrics` | Session-level notifications such as compression, cancellation, and token limits | + +### Terminal Annotation Structure + +```json +{ + "type": "x-qwen-terminal", + "channel": "stdout", + "source": "assistant", + "spans": [ + {"start": 0, "end": 24, "style": {"theme_token": "AccentGreen"}} + ], + "ansi": [ + {"offset": 0, "code": "\u001b[32m"}, + {"offset": 24, "code": "\u001b[0m"} + ], + "console_level": "info", + "exit_code": null, + "prompt_id": "session-123########7" +} +``` + +- `channel`: `stdout` / `stderr`; console logs use `ConsolePatcher` to inject `stderr` with `console_level`. +- `source`: `assistant`, `tool`, `console`, `system`, enabling layered display on the frontend. +- `spans.style.theme_token`: Reuse CLI themes (`AccentGreen`, `DiffAdded`, etc.). +- `ansi`: Positions of raw ANSI sequences for frontends to replay. +- `console_level`: When `source=console`, takes values `log`, `warn`, `error`, `info`, or `debug` so that downstream consumers can mirror `ConsolePatcher` behavior. +- `exit_code`: Provided when `source=system` and the flow finishes. +- `prompt_id`: Associates the record with a specific turn. + +### Tool Result Display + +```json +{ + "type": "x-qwen-tool-display", + "tool_call_id": "call_tool-1", + "session_id": "session-123", + "status": "executing", + "result_display": { + "kind": "file_diff", + "file_name": "src/main.py", + "diff": "--- a/src/main.py\n+++ b/src/main.py\n@@ -1 +1 @@\n-print('Hi')\n+print('Hello')", + "original": "print('Hi')\n", + "modified": "print('Hello')\n", + "stat": { + "ai_added_lines": 1, + "ai_removed_lines": 1, + "user_added_lines": 0, + "user_removed_lines": 0 + } + }, + "confirmation": null, + "pending": false, + "timestamp": 1739430005 +} +``` + +- `status` follows `ToolCallStatus` (`Pending`, `Executing`, `Success`, `Error`, `Canceled`, `Confirming`). +- `tool_call_id` reuses the OpenAI schema field; together with `session_id` it uniquely locates a call, and can be reused even in test/non-session modes. +- `result_display` supports unions such as `string`, `file_diff`, `todo_list`, `plan_summary`, and `task_execution`. +- `confirmation` serializes the relevant `ToolCallConfirmationDetails` (diffs, commands, MCP payloads) so third parties can surface approval dialogs. +- `pending=true` indicates the call is still validating/queued and matches `ToolCallStatus.Pending`. +- `timestamp` is used for ordering and matches records from `useReactToolScheduler`. + +### Thought and Session Events + +```json +{ + "type": "x-qwen-thought", + "subject": "Analyzing repo", + "description": "Listing tsconfig patterns..." +} +``` + +```json +{ + "type": "x-qwen-session-event", + "event": "MAX_TOKENS", + "message": "Response truncated due to token limits.", + "metrics": { + "original_tokens": 12000, + "compressed_tokens": 8000 + } +} +``` + +- `event` values derive from `GeminiEventType`, including `Finished`, `ChatCompressed`, `MaxSessionTurns`, `USER_CANCELLED`, etc. +- `metrics` optionally provide statistics such as tokens before and after compression. + +## Input Format (Qwen Session Protocol) + +| Mode | Behavior | Notes | +| --- | --- | --- | +| `text` | Preserve original TUI text input | Parse natural language or command-line text | +| `stream-json` / `stream-chunk-json` | Use Qwen Chat Request | Each JSON line describes one incremental input | + +### Qwen Chat Request Mode + +```jsonc +{ + "session_id": "session-123", + "prompt_id": "session-123########7", + "model": "qwen-coder", + "input": { + "origin": "user", + "parts": [ + {"type": "text", "text": "Please fix the bug in @main.py"} + ], + "command": null + }, + "options": { + "temperature": 0.2, + "tool_overrides": ["EditTool"] + } +} +``` + +- `session_id`: Session identifier (`config.getSessionId()`); pass `"_new"` to create a new session. +- `prompt_id`: Distinguishes turns; default format `########`, and must be reused when tools continue the conversation. +- `input.origin`: `user` / `tool_response` / `system`, determining how the session continues. +- `input.parts`: Compatible with `@google/genai` PartListUnion, allowing `text`, `function_response`, `file_data`, etc. When `origin="user"`, the CLI concatenates all text parts in order and reuses the same semantics as `prepareQueryForGemini`. +- `options`: Per-request overrides (model, sampling, tool allowlist). +- Extended fields: + - `tool_call_id`: Required when `origin=tool_response` to match output events. + - `continuation`: Boolean equivalent to `submitQuery(...,{isContinuation:true})`; if omitted, the CLI infers continuation based on `origin` and command context. + - `tool_request`: Mirrors `ToolCallRequestInfo` to support concurrent tools and sub-agents. + +### Session Control + +- Third parties may invoke the CLI’s “create session” command or reuse an existing ID; when `session_id` is missing or set to `"_new"`, the CLI returns the actual ID in the first `chat.completion`. +- `prompt_id` and `tool_call_id` together isolate concurrent flows; reuse them for tool continuations. The default format is `########`, but any unique identifier is acceptable. +- Sending `input.origin="system"` with parts such as `{"type":"instruction","text":"/clear"}` triggers slash commands exactly as in the TUI. +- When `origin="tool_response"`, `tool_call_id` is mandatory so the CLI can attach results to the correct turn before proceeding. + +### Commands and `@` References + +| Mode | Trigger Method | Behavior | +| --- | --- | --- | +| Implicit Parsing | `origin="user"` with text starting with `/`/`?`/`@` | CLI automatically follows the slash/at flow, invoking logic such as `handleAtCommand` | +| Explicit Declaration | Describe the command in `input.command` | Recommended for third parties to avoid string parsing ambiguities | + +- `command.kind`: `slash` / `at`, matching the TUI command taxonomy. +- `command.path`: For slash commands, represents the hierarchical command array (mirrors `commandPath`); omit for `at`. +- `command.args`: Remaining argument string that the CLI will parse exactly as in TUI mode. +- `command.references`: When `kind="at"`, can directly provide resolved references such as `{ "original":"@src/main.ts", "resolved":"src/main.ts" }`; if omitted, the CLI falls back to `handleAtCommand` and `useAtCompletion` for automatic resolution. + +Explicit command example: + +```jsonc +{ + "session_id": "session-123", + "prompt_id": "session-123########8", + "input": { + "origin": "user", + "parts": [{"type": "text", "text": ""}], + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + } + } +} +``` + +- CLI outputs the corresponding `result/command`: + +```jsonc +{ + "type": "result/command", + "session_id": "session-123", + "prompt_id": "session-123########8", + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + }, + "result": { + "type": "message", + "level": "info", + "content": "The current session has 3 history records." + } +} +``` + +- `command.result.type` supports enums such as `message`, `dialog`, `tool`, and `submit_prompt` for easier UI rendering. +- If the command triggers a model call, subsequent output includes `assistant`, `tool_call`, and `result/*` events, following the same order as the TUI. + +Explicit `@` reference example: + +```jsonc +{ + "session_id": "session-123", + "input": { + "origin": "user", + "parts": [{"type": "text", "text": "Please review @src/main.py"}], + "command": { + "kind": "at", + "references": [ + {"original": "@src/main.py", "resolved": "src/main.py"} + ] + } + } +} +``` + +- The CLI uses the explicit `references` to read files; if missing, it falls back to automatic parsing, preserving TUI tolerance for partial paths. + +### SDK-Side Command Coordination + +| `command.result.type` | Description | SDK Guidance | +| --- | --- | --- | +| `handled` | Command completed fully inside the CLI | No extra action | +| `message` | Informational or error message | Display a notification in the UI | +| `dialog` (`auth`/`theme`/`editor`/`privacy`/`settings`/`model`/`subagent_create`/`subagent_list`/`help`) | Requires dialogs or navigation | Present the appropriate UI panel | +| `tool` | Triggers a tool call | Convert `tool_request` or command args into a tool request and monitor results | +| `submit_prompt` | Immediately sends a PartListUnion to the model | Submit `content` as the next input with `continuation=true` | +| `load_history` | Reset or load specified session history | Refresh or reload history views | +| `quit` / `quit_confirmation` | Exit flow or await user confirmation | Control the host app lifecycle and return confirmation | +| `confirm_shell_commands` | Shell commands awaiting approval | Show an approval dialog; resend with `approvedCommands` / `confirmationOutcome` once approved | +| `confirm_action` | General confirmation prompt | Provide confirmation UI and return the outcome | + +- SDKs should expose a unified command API that maps user input to the structure above and drives local UI or follow-up requests based on the resulting action. +- When commands kick off further model interactions (e.g., `/submit`, expanded `@file`), the CLI keeps streaming `assistant`, `tool_call`, and `result/*` events in the same order as the TUI, letting third parties replicate the interaction with pure text input plus JSON output. + +### STDIN Command Acknowledgement + +- With `--input-format=stream-json`, the CLI must still acknowledge `/`, `?`, and `@` commands immediately, reusing `useSlashCommandProcessor` and `handleAtCommand`. +- After parsing, the CLI emits structured responses such as: + +```jsonc +{ + "type": "result/command", + "session_id": "session-123", + "prompt_id": "session-123########8", + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + }, + "result": { + "type": "message", + "level": "info", + "content": "The current session has 3 history records." + } +} +``` + +- The `result` field matches the `command.result.type` table above for consistent UI handling. +- If commands lead to continued interaction, the CLI streams the rest of the turn with matching session fields. +CLI outputs the corresponding `result/command`: + +```jsonc +{ + "type": "result/command", + "session_id": "session-123", + "prompt_id": "session-123########8", + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + }, + "result": { + "type": "message", + "level": "info", + "content": "The current session has 3 history records." + } +} +``` + +- `command.result.type` supports enums such as `message`, `dialog`, `tool`, and `submit_prompt` for easier UI rendering. +- If the command triggers a model call, subsequent output includes `assistant`, `tool_call`, and `result/*` events, following the same order as the TUI. + +## Real-Time Suggestions, Heartbeats, and Interrupts + +| Capability | Request | Response | Notes | +| --- | --- | --- | --- | +| Command Suggestions | `command_hint_request` | `result/command_hint` | Triggered by characters; `trigger` supports `slash`, `at`; `status` can be `ok` / `loading` / `error` | +| Heartbeat | `heartbeat_request` | `result/heartbeat` | Periodic keepalive; the CLI may proactively push the same event | +| Interrupt/Cancel | `control/cancel` | `result/cancel` + `control_response` | Simulates ESC; `reason` is currently fixed at `escape` | + +### Suggestion Request Example (`/c`) + +```jsonc +{ + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "text": "/c", + "cursor": 2, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } +} +``` + +### Suggestion Response Example + +```jsonc +{ + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "status": "ok", + "suggestions": [ + {"label": "chat", "value": "chat", "description": "Manage conversation history."}, + {"label": "clear", "value": "clear", "description": "Clear the screen and conversation history."}, + {"label": "compress", "value": "compress", "description": "Compresses the context by replacing it with a summary."}, + {"label": "copy", "value": "copy", "description": "Copy the last result or code snippet to clipboard."}, + {"label": "corgi", "value": "corgi", "description": "Toggles corgi mode."} + ], + "metadata": { + "is_perfect_match": false + } +} +``` + +### Suggestion Request Example (`@src/co`) + +```jsonc +{ + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "at", + "text": "@src/co", + "cursor": 7, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } +} +``` + +### Suggestion Response Example (`@src/co`) + +```jsonc +{ + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "at", + "status": "ok", + "suggestions": [ + {"label": "src/components/", "value": "src/components/"}, + {"label": "src/components/Button.tsx", "value": "src/components/Button.tsx"}, + {"label": "src/components/Button with spaces.tsx", "value": "src/components/Button\\ with\\ spaces.tsx"} + ], + "metadata": { + "is_perfect_match": false + } +} +``` + +### Suggestion Request Example (`/?`) + +```jsonc +{ + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "text": "/?", + "cursor": 2, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } +} +``` + +### Suggestion Response Example (`/?`) + +```jsonc +{ + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "status": "ok", + "suggestions": [ + { + "label": "help", + "value": "help", + "description": "For help on Qwen Code.", + "matchedIndex": 0 + } + ], + "metadata": { + "is_perfect_match": true + } +} +``` + +- `suggestions` reuse the TUI `Suggestion` structure; `status="loading"` indicates the CLI is preparing data (for example while `useAtCompletion` builds the file index), and `error` responses include a message. +- The CLI reuses `useSlashCompletion` and `useAtCompletion` to generate hints; these requests never touch `GeminiChat` nor session history, and `prompt_id` may carry a `_preview` suffix that is echoed back. +- Triggers can fire repeatedly as the text or cursor changes; send `{"type":"command_hint_cancel", ...}` to stop long-running lookups. + +### Heartbeat Request and Response + +```jsonc +{"type":"heartbeat_request","session_id":"session-123"} +{"type":"result/heartbeat","session_id":"session-123","status":"ok","ts":1739430123} +``` + +- Third parties can configure timeouts (e.g., 10 seconds) to detect CLI hangs and restart the process. +- The CLI responds with the same `result/heartbeat` shape and may proactively push keepalive events in the background. +- `@third-party/anthropics/claude-agent-sdk-python` currently lacks a heartbeat implementation; the P1.1 rollout must define default intervals, timeout policy, and whether SDKs can customize the heartbeat cadence. + +### Interrupt Example + +```jsonc +{ + "type": "control/cancel", + "session_id": "session-123", + "prompt_id": "session-123########8", + "reason": "escape" +} +``` + +- The CLI must call `cancelOngoingRequest`, abort the `AbortController`, finalize history items, and emit `result/cancel`. +- If no request can be canceled, the CLI should return `{"type":"result/cancel","status":"noop"}` with an explanation. +- When the underlying stream reports `GeminiEventType.UserCancelled`, the CLI also emits `{"type":"x-qwen-session-event","event":"USER_CANCELLED","message":"User cancelled the request."}` so clients can update the session state. +- Double-tapping ESC to clear the input is handled on the client side; structured integrations can replicate that locally without sending additional CLI messages. + +## Event Categories and Channels + +| Category | Direction | Representative Events | Acknowledgment Requirement | Purpose | +| --- | --- | --- | --- | --- | +| Result Events | CLI → STDOUT | `result/command`, `result/command_hint`, `result/heartbeat`, `result/cancel`, `x-qwen-session-event` | No acknowledgment required | Publish command output, status tips, heartbeat results | +| Request Events | SDK/Third Party → STDIN | `command_hint_request`, `heartbeat_request`, `control/cancel` | CLI returns the corresponding `result/*` | Trigger immediate actions or controls | +| Control Channel | CLI ↔ STDIN/STDOUT | `control_request` / `control_response` | Must match `request_id` | Permission approvals, hook callbacks, MCP calls | + +These categories can also be viewed as three event mechanisms: + +1. **Result events (`result/*`)** + - CLI → STDOUT one-way announcements such as `result/command`, `result/command_hint`, `result/heartbeat`, `result/cancel`, `x-qwen-session-event`. + - Carry command output, hints, heartbeat results, and session state updates. No acknowledgment is required. + +2. **Request events (`*request`)** + - SDK/third party → STDIN directives like `command_hint_request`, `heartbeat_request`, or `control/cancel`. + - Trigger immediate CLI behavior, and each request must be answered with the corresponding `result/*`. + +3. **Control channel events (`control_request` / `control_response`)** + - CLI outputs `control_request`, external integrations respond with matching `control_response`; used for approvals, hook callbacks, MCP JSON-RPC, etc. + - Each carries a unique `request_id`, requires timely responses, and bypasses conversation history to stay in sync with TUI behavior. + +- All events travel over a unified JSON Lines protocol; route them by `type`/`subtype`. +- Control channel participants must implement timeout and error fallback strategies to avoid blocking the CLI. + +## Control Requests and Responses + +| Field | Description | +| --- | --- | +| `type` | Always `control_request` or `control_response` | +| `request_id` | Unique identifier pairing requests with responses | +| `subtype` | `can_use_tool`, `hook_callback`, `mcp_message`, etc. | +| `payload` / `response` | Carries event details or acknowledgment content | + +- `control_request` example: + +```jsonc +{ + "type": "control_request", + "request_id": "req-1", + "subtype": "can_use_tool", + "session_id": "session-123", + "prompt_id": "session-123########8", + "tool": { + "name": "edit_file", + "arguments": {"path": "main.py", "patch": "..."} + }, + "metadata": { + "reason": "apply_diff" + } +} +``` + +- Corresponding `control_response`: + +```jsonc +{ + "type": "control_response", + "request_id": "req-1", + "response": { + "subtype": "success", + "result": { + "behavior": "approve", + "message": "Allowed to execute." + } + } +} +``` + +- If a callback fails, the SDK must return `{"subtype":"error","error":{"message":"...","retryable":false}}`, and the CLI will follow the safety fallback (auto-deny or report failure). +- MCP integration: `subtype:"mcp_message"` carries JSON-RPC (`tools/list`, `tools/call`, etc.), and the SDK wraps results as `mcp_response` inside `control_response`. +- Current state: the CLI primarily emits one-way output while the TUI handles confirmations inline, so dual-channel hooks similar to Claude Code still need to be generalized. +- Design requirement: whenever the CLI needs external confirmation, it should emit a `control_request` and wait for the matching `control_response`, covering tool approvals, hook callbacks, and MCP JSON-RPC exchanges. +- Coverage examples: `/confirm_shell_commands`, `confirm_action`, `quit_confirmation`, tool permission approvals, sub-agent orchestration, and future dialogs/forms/identity flows. +- Fallback policy: if the control channel is disabled, the CLI must explicitly reject risky actions or state “unavailable in structured mode” via `result/command` instead of failing silently. +- Follow-up tasks: (1) add JSON Schemas for `control_request` / `control_response`; (2) build a unified dispatcher so the TUI and CLI share the same logic; (3) extend SDKs with listeners/hooks that surface the requests to upper-layer UIs. + +## Version Negotiation and Error Semantics + +| Item | Description | +| --- | --- | +| Protocol Version | The first `chat.completion` includes `protocol_version`, `input_format`, `output_format`, and `capabilities` in `metadata` / `system_fingerprint` | +| Version Mismatch | If the SDK requests unsupported features, the CLI returns `finish_reason="error"`, marks `unsupported_protocol` in `metadata.error`, and exits with a non-zero code | +| Fatal Errors | Emit OpenAI-style error objects and terminate | +| Recoverable Errors | Return error details via `chat.completion` with `finish_reason` `stop/tool_calls` while keeping the process healthy | +| Control Protocol Errors | Attach details in `metadata.control_errors` so the SDK can decide whether to retry or terminate | + +Fatal error example: + +```json +{ + "error": { + "message": "invalid tool input", + "type": "invalid_request_error", + "param": "tools[0].function.arguments", + "code": "QWEN_INVALID_TOOL_ARGS" + } +} +``` + +## Security and Resource Control + +| Domain | Policy | +| --- | --- | +| Permissions | Structured mode still honors existing CLI approval and tool allowlists; high-risk actions default to denial without acknowledgement | +| Audit | Control channel lets the SDK audit sensitive operations beforehand; if disabled, `result/command` must state the limitation explicitly | +| Keepalive | `heartbeat` events can trigger process recycling to avoid resource leaks | + +## Logging Layers and Observability + +| Component | Highlights | +| --- | --- | +| ConsolePatcher | Intercepts `console.*`, records `channel="stderr"` and `console_level` within `x-qwen-terminal` | +| `log_scope` extension | Recommend attaching `log_scope` (`system`, `tool`, `debug`) to annotations in structured mode to align with `ConfigLogger` levels | +| Tool Logs | Output via `ToolResultDisplay`; `result_display` can carry `log_scope` for filtering | +| OTel Plan | SDK and CLI each integrate OpenTelemetry to chain traces and spans | + +- Third parties are advised to store the full message sequence in audit logs for replay. + +## Debugging Examples + +```bash +echo '{"model":"qwen-coder","messages":[{"role":"user","content":"Hello"}]}' \ + | qwen --input-format stream-json --output-format stream-json + +echo '{"model":"qwen-coder","messages":[{"role":"user","content":"Output the greeting character by character."}]}' \ + | qwen --input-format stream-json --output-format stream-chunk-json +``` + +- Use these commands to quickly verify output formats and event flows. diff --git a/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_cn.md b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_cn.md new file mode 100644 index 000000000..3facc45f4 --- /dev/null +++ b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_cn.md @@ -0,0 +1,611 @@ +# RFC: Qwen-Code CLI 输出格式与 IPC Stream JSON 能力 + +- **状态**: Draft +- **更新时间**: 2025-10-13 +- **作者**: x22x22 +- **追踪**: + +## 摘要 + +为了支撑第三方系统以编程方式稳定集成 Qwen-Code,本 RFC 旨在定义并实现 CLI 层的结构化输入/输出能力。核心改动是在现有 CLI 上引入 `--input-format/--output-format`(`text`/`stream-json`/`stream-chunk-json`)与对称的结构化输入协议,配套 JSON Lines、错误语义、会话字段及分阶段落地计划。这一能力是 IPC SDK、第三方后端服务、UI 组件以及多语言 SDK 协同工作的基础,可直接回应 issue #795 关于“提供 `--input-format/--output-format json/stream-json`”的诉求,并与现有架构设计保持一致。 + +## 背景 + +### Issue 795 概述 + +社区提出希望参考 Claude Code 的实现,为 Qwen-Code CLI 增加 `--input-format/--output-format json/stream-json`,以便第三方程序可以无 UI 干预地消费 CLI 输出,构建可自动化的集成能力。 + +### 集成方场景 + +1. task级别的集成,即通过sdk一条条发送prompt,中间接收多次数据并进行处理后再返回给用户展示。 +2. task级别的集成,即通过sdk一条条发送prompt,直接流式返回给用户展示。 +3. 在输入框中输入指令(/,@,?)需要和在TUI中一样要有反应。 +4. 使用类似xterm.js的库在前端模拟终端交互,但是输入框和终端内容是分离的。 + +### 集成方式 + +1. 第三方程序依赖后期提供的各个语言的"qwen-code-agent-sdk"; +2. "qwen-code-agent-sdk"使用子进程方式启动"qwen code",并基于stdio进行双向IPC通信; +3. "qwen-code-agent-sdk"接收"qwen code"执行的结果; +4. 第三方程序获取到"qwen-code-agent-sdk"的结果。 + +### 现状痛点 + +1. CLI 仅支持人机交互式的纯文本 STDOUT,输出不稳定。 +2. 没有对称的结构化输入,难以驱动高级自动化流程。 + +## 设计目标 + +1. 在 CLI 层提供稳定可配置的输出格式选项,默认兼容现有行为。 +2. 实现 JSON Lines 流式协议,匹配 IPC SDK 设计中的消息语义与控制协议。 +3. 提供对称的结构化输入能力,支持 SDK 将提示与控制消息写入 STDIN。 +4. 定义跨语言可共用的schema、错误语义。 +5. 为后期需要实现的qwen-code-agent-sdk友好化考虑。 + +## 非目标 + +- 不在本 RFC 中体现 SDK 的设计。 + +## 方案概览 + +### CLI 参数设计 + +| 参数 | 取值 | 默认值 | 说明 | +|------|------|--------|------| +| `--input-format` | `text` / `stream-json` | `text` | 控制 STDIN 解析逻辑 | +| `--output-format` | `text` / `stream-json` / `stream-chunk-json` | `text` | 控制 STDOUT 输出格式 | +| *(自动)* | | | 当 `--input-format/--output-format` 取值为 `stream-json` / `stream-chunk-json` 时,CLI 自动禁用 TUI,进入结构化输出模式;仅 `text` 模式保留原有 TUI 行为。**通信仍通过标准输入/输出管道完成,未改用额外通道** | + +CLI 参数在帮助信息中展示,且 `text` 模式沿用现有行为,保证向后兼容。无需额外的 `--stdio` 开关。 + +### 输出格式语义 + +1. **`text`(兼容模式)** + - 沿用当前 STDOUT 逻辑,适用人类交互。 + - 不保证结构化信息;未来将逐步标记为“仅供手动使用”。 + +2. **`stream-json`(消息级 JSON Lines)** + - 每行输出一个符合 OpenAI `/chat/completions` 响应格式的对象,`object` 固定为 `chat.completion`。 + - CLI 会在同一个流程中依次输出:初始化回执(含能力声明)、每次助手回复/工具调用的 `chat.completion` 对象、收尾摘要。 + - 示例: + ```json + {"object":"chat.completion","id":"chatcmpl-session-123","created":1739430000,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","content":"正在分析...","tool_calls":null},"finish_reason":"stop"}],"usage":{"prompt_tokens":1200,"completion_tokens":80,"total_tokens":1280}} + {"object":"chat.completion","id":"chatcmpl-session-123","created":1739430002,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","tool_calls":[{"id":"tool-1","type":"function","function":{"name":"edit_file","arguments":"..."}}]},"finish_reason":"tool_calls"}]} + {"object":"chat.completion","id":"chatcmpl-session-123","created":1739430010,"model":"qwen-coder","choices":[{"index":0,"message":{"role":"assistant","content":"修复完成,已更新文件。"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1600,"completion_tokens":200,"total_tokens":1800}} + ``` + - 仍保持 JSONL 逐行输出,方便 第三方后端服务、SDK 以消息级颗粒度消费。 + +3. **`stream-chunk-json`(增量 chunk JSON Lines)** + - 输出遵循 OpenAI 流式响应格式;每行一个 `chat.completion.chunk` 对象,`choices[].delta` 承载 token/块增量,同一 `id` 覆盖整个会话。 + - CLI 会在开头发送带 `delta: {}` 的 `role` 声明,过程中发送文本或工具调用增量,最后输出仅含 `finish_reason`(以及可选 `usage`)的结束 chunk,与 OpenAI `/chat/completions` 规范保持一致。 + - 示例: + ```json + {"object":"chat.completion.chunk","id":"chatcmpl-session-123","created":1739430000,"model":"qwen-coder","choices":[{"index":0,"delta":{"role":"assistant"}}]} + {"object":"chat.completion.chunk","id":"chatcmpl-session-123","created":1739430001,"model":"qwen-coder","choices":[{"index":0,"delta":{"content":"正在"}},{"index":1,"delta":{"content":""}}]} + {"object":"chat.completion.chunk","id":"chatcmpl-session-123","created":1739430001,"model":"qwen-coder","choices":[{"index":0,"delta":{"content":"分析..."},"finish_reason":null}]} + {"object":"chat.completion.chunk","id":"chatcmpl-session-123","created":1739430003,"model":"qwen-coder","choices":[{"index":0,"delta":{"tool_calls":[{"id":"tool-1","type":"function","function":{"name":"edit_file","arguments":"..."}}]}}]} + {"object":"chat.completion.chunk","id":"chatcmpl-session-123","created":1739430008,"model":"qwen-coder","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}} + {"object":"chat.completion.chunk","id":"chatcmpl-session-123","created":1739430008,"model":"qwen-coder","usage":{"prompt_tokens":1600,"completion_tokens":200,"total_tokens":1800},"choices":[]} + ``` + - 不再额外补发完整 `chat.completion`,而是在最后的 chunk 中给出 `finish_reason` 与可选 `usage`,符合 OpenAI 文档(见 `@third-party/code-cli-any-llm/docs/research/completions.chat.openapi.documented.yml`)的结尾语义。 + +#### 消费策略 + +- **消息级 JSONL (`stream-json`)**: 适合需要稳定阶段性结果的 第三方后端服务 或 CLI 包装器,默认采用此模式,与现有 JSONL 管线兼容。 +- **增量 chunk (`stream-chunk-json`)**: 适合 IDE / UI 组件的“边生成边展示”场景;SDK 需监听 `chat.completion.chunk`,并在接收到最终 `finish_reason` 后处理总结对象。 +- **前端模拟终端关系**: 无论 CLI 原始模式为文本还是结构化输出,SDK 均可统一消费 `chat.completion` / `chat.completion.chunk` 序列。从 CLI 角度,`stream-json` 与 `stream-chunk-json` 必须完整覆盖 TUI 模式会写入标准输出的全部语义信息(文本、ANSI/Vt100 控制、工具提示、退出码等),并通过 `choices[].message.content` / `choices[].delta.content` 与 `choices[].delta.annotations`(示例:`{"type":"x-qwen-ansi","value":"\u001b[32m"}`)进行编码。这样第三方即可在基于 xterm.js 的终端里还原颜色、光标移动、逐步输出等效果,而无需依赖 `text` 模式。 + +### 前端模拟终端注解(annotations) + +`packages/cli/src/nonInteractiveCli.ts` 与 `packages/cli/src/ui/utils/ConsolePatcher.ts` 共同决定了文本模式输出的来源:模型内容通过 `GeminiEventType.Content` 追加到 `stdout`;工具执行状态、错误与日志通过 `ConsolePatcher` 打印至 `stderr`;工具结果结构体 (`ToolResultDisplay` 等) 则在 `packages/cli/src/ui/hooks/useReactToolScheduler.ts` 中驱动 UI 渲染。为保证 `stream-json` / `stream-chunk-json` 能完整覆盖这些信息,我们在 OpenAI `annotations` 字段上新增以下约定: + +| 注解类型 | 作用 | 对应代码来源 | +|----------|------|--------------| +| `x-qwen-terminal` | 描述终端通道、样式与 ANSI/Vt100 控制序列 | `process.stdout.write`/`process.stderr.write` 拦截 (`nonInteractiveCli.ts`) + `ConsolePatcher` | +| `x-qwen-tool-display` | 携带工具执行过程/结果的结构化数据 | `ToolResultDisplay`/`ToolCallStatus` (`packages/core/src/tools/tools.ts`, `packages/cli/src/ui/hooks/useReactToolScheduler.ts`) | +| `x-qwen-thought` | 暴露 `GeminiEventType.Thought` 推理摘要 | `packages/core/src/core/turn.ts` | +| `x-qwen-session-event` | 包含 `Finished`、`ChatCompressed` 等控制事件的人类可读提示 | `packages/core/src/core/turn.ts` + `useGeminiStream.ts` | + +#### `x-qwen-terminal` + +```json +{ + "type": "x-qwen-terminal", + "channel": "stdout", + "source": "assistant", + "spans": [ + { + "start_index": 0, + "end_index": 5, + "style": { + "theme_token": "AccentGreen", + "bold": false + } + } + ], + "ansi": [ + {"offset": 0, "sequence": "\u001b[32m"}, + {"offset": 5, "sequence": "\u001b[0m"} + ] +} +``` + +- `channel`: `stdout` / `stderr`(日志与错误来自 `ConsolePatcher` 会标记为 `stderr`,同时附带 `console_level` 字段以区分 `log`/`warn`/`error`)。 +- `source`: `assistant`(模型输出)、`tool`(工具实时输出/结果)、`console`(`console.*` 调用)、`system`(启动/关闭/致命错误)。 +- `spans.style.theme_token`: 直接复用 `Colors` 主题枚举(如 `AccentGreen`、`DiffAdded`,定义于 `packages/cli/src/ui/colors.ts` 与 `themes/*.ts`),第三方可映射为自定义调色板。 +- `ansi`: 原始 ANSI 序列与其在内容中的偏移,便于 xterm.js 原样重放;若无 ANSI 控制,数组可为空。 +- 附加字段: + - `console_level`: 当 `source = console` 时,为 `log` / `warn` / `error` / `info` / `debug`。 + - `exit_code`: 当 `source = system` 且流程结束时给出。 + - `prompt_id`: 若可关联到某次用户输入/回合(`nonInteractiveCli.ts` 中的 `prompt_id`),用于跨消息聚合。 + +> 示例:`packages/cli/index.ts` 中 `FatalError` 会输出红色字体;在 `stream-chunk-json` 中表现为 `delta.content:"配置缺失"` + `annotations:[{type:"x-qwen-terminal",channel:"stderr",source:"system",ansi:[...]}]`。 + +#### `x-qwen-tool-display` + +```json +{ + "type": "x-qwen-tool-display", + "tool_call_id": "call_tool-1", + "session_id": "session-123", + "status": "executing", + "result_display": { + "kind": "file_diff", + "file_name": "src/main.py", + "diff": "--- a/src/main.py\n+++ b/src/main.py\n@@ -1 +1 @@\n-print('Hi')\n+print('Hello')", + "original": "print('Hi')\n", + "modified": "print('Hello')\n", + "stat": { + "ai_added_lines": 1, + "ai_removed_lines": 1, + "user_added_lines": 0, + "user_removed_lines": 0 + } + } +} +``` + +- `status`: 映射自 `ToolCallStatus`(`Pending`,`Executing`,`Success`,`Error`,`Canceled`,`Confirming`),见 `packages/cli/src/ui/types.ts`。 +- `tool_call_id`: 复用 OpenAI schema 字段名,结合 `session_id` 可唯一定位某次调用,便于第三方将多个调用分区展示。若 CLI 处于测试/非会话模式,可仅使用 `tool_call_id`。 +- `result_display` union: + - `kind: "string"` → `{ "text": "stdout captured..." }`。 + - `kind: "file_diff"` → `FileDiff` 字段集合。 + - `kind: "todo_list"` → `{ "todos": [{id, content, status}] }`(源自 `TodoResultDisplay`)。 + - `kind: "plan_summary"` → `{ "message": "...", "plan_markdown": "..."}` + - `kind: "task_execution"` → 直接映射 `TaskResultDisplay`(包含 `subagentName`、`status`、`toolCalls` 等)。 +- `confirmation`: 若 `ToolCallConfirmationDetails` 仍待用户确认(`type: 'edit'|'exec'|'mcp'|'info'|'plan'`),序列化必要字段(例如 diff、命令、提示等),便于第三方弹窗。 +- `pending`: 布尔值,表示调用仍在 `validating`/`scheduled` 状态,尚未交给执行器;用于第三方提前占位,与 `ToolCallStatus.Pending` 等价。 +- `timestamp`: 可选毫秒级时间戳,对应 `useReactToolScheduler` 中的历史写入时间,便于排序。 + +#### `x-qwen-thought` + +```json +{ + "type": "x-qwen-thought", + "subject": "Analyzing repo", + "description": "Listing tsconfig patterns..." +} +``` + +- 对应 `GeminiEventType.Thought` 推理摘要(`packages/core/src/core/turn.ts` 第 250 行),在 UI 中常用于“思考中”提示。 + +#### `x-qwen-session-event` + +用于承载会话级事件(`Finished`、`ChatCompressed`、`SessionTokenLimitExceeded` 等)的人类可读提示,字段包括: + +```json +{ + "type": "x-qwen-session-event", + "event": "MAX_TOKENS", + "message": "Response truncated due to token limits.", + "metrics": { + "original_tokens": 12000, + "compressed_tokens": 8000 + } +} +``` + +`event` 取值来自 `GeminiEventType`(`Finished`、`ChatCompressed`、`MaxSessionTurns` 等),`message` 为 UI 在 `useGeminiStream.ts` 中构造的提示文案。当用户或集成方触发 ESC 取消时,CLI 需发送 `event: "USER_CANCELLED"`、`message: "User cancelled the request."`,保持与 TUI 一致。 + +### 输入格式语义(Qwen 会话协议) + +> 传输层保持与现有 CLI 相同:所有结构化输入仍以 `\n` 结尾的 JSON 行写入 STDIN,CLI 侧按照 `--input-format` 选择解析器。该模式直接参考了 `@anthropics/claude-agent-sdk-python` 中的 `SubprocessCLITransport` 实现,后者同样在流式模式下对每条消息 `json.dumps(...)` 后写入子进程 STDIN。 + +- `text`: 与现有模式一致,从 STDIN 读取自然语言指令,维持原始 TUI 行为。 +- `stream-json` / `stream-chunk-json`: CLI 期望每行输入遵循 **Qwen Chat Request** 协议。该协议借鉴 OpenAI `/chat/completions` 的结构,但不会要求调用方重复发送完整历史,而是仅提交“本次增量”。CLI 通过 session ID 与内部 `GeminiChat` 历史(参见 `GeminiChat.sendMessageStream`)维护上下文。 + +#### Qwen Chat Request 结构 + +```jsonc +{ + "session_id": "session-123", // 必填, 由 CLI/SDK 生成或复用 + "prompt_id": "session-123########7", // 可选, 不传时 CLI 自动生成; 与工具调用关联时需复用 + "model": "qwen-coder", // 可选, 为空时沿用当前会话模型 + "input": { + "origin": "user", // user | tool_response | system + "parts": [ // 与 @google/genai PartListUnion 对齐 + {"type": "text", "text": "请修复 @main.py 的 bug"} + ], + "command": null // 可选, 显式声明 slash/@ 命令 (见下文) + }, + "options": { + "temperature": 0.2, + "tool_overrides": ["EditTool"] + } +} +``` + +- `session_id`: 对应 `config.getSessionId()`,是所有会话状态的主键。第三方可调用 CLI 提供的“创建会话”命令获取,或复用现有 TUI 生成的 ID。 +- `prompt_id`: 用于标识一次用户输入或工具续写;同一个 `prompt_id` 下可能经历多轮工具调用。CLI 默认格式为 `########`,第三方可复用该格式或自定义但需保持唯一性。 +- `input.origin`: + - `user`: 常规用户输入,`parts` 通常是一个或多个 `{"type":"text"}`。 + - `tool_response`: 当第三方执行完一个工具后向模型返回结果时使用。此时 `parts` 应包含 `{"type":"function_response","function_response":{...}}`,同时需要提供 `tool_call_id` 字段(见下方)。 + - `system`: CLI 内部控制消息,例如 slash 命令恢复历史;第三方仅在需要注入系统提示时使用。 +- `input.parts`: 允许出现 `text`、`function_response`、`file_data` 等 `@google/genai` 支持的结构,CLI 会直接传给 `GeminiChat`。当 `origin="user"` 时,CLI 会将所有 text part 按顺序拼接成字符串后复用原有 TUI 语义(同 `prepareQueryForGemini` 流程)。 +- `options`:对单次请求的参数覆写(模型、采样、工具限制等),默认读取当前 Config。 +- 额外字段: + - `tool_call_id`: 当 `origin=tool_response` 时必填,用于与输出中的 `tool_call_id` 匹配(参照 `CoreToolScheduler`)。 + - `continuation`: 布尔值,等价于 `submitQuery(..., { isContinuation: true })`;缺省由 CLI 根据 `origin` 判定。 + - `tool_request`: 可选对象,镜像 `ToolCallRequestInfo` 中的 `args` / `isClientInitiated` / `prompt_id` 等字段,便于第三方在 CLI 侧复用同一调度逻辑(例如并发工具、子代理)。未提供时由 CLI 自动推断。 + +#### 会话控制 + +- `session_id` 不存在或传入 `"_new"` 时,CLI 自动创建新会话并返回首个响应中附带的实际 `session_id`。 +- 通过 `input.origin="system"` + `parts:[{"type":"instruction","text":"/clear"}]` 可触发清空历史(模拟 `/clear` 命令)。 +- `prompt_id` 与 `tool_call_id` 共同保证并发调用不串扰:`CoreToolScheduler` 与子代理均使用 `callId` 区分任务,第三方在发送工具结果时需保留该 ID。 + +#### 命令与 @ 引用 + +TUI 在文本模式下会根据输入字符串自动解析 slash 命令与 `@` 引用: + +- Slash (`/` 或 `?`) 由 `useSlashCommandProcessor` 处理,可触发内建命令、子命令及工具调度。 +- `@` 引用由 `handleAtCommand` 解析,会使用文件服务与 `read_many_files`、`glob` 等工具扩展用户提示。 + +结构化协议保留同样语义: + +1. **隐式模式**:当 `origin="user"` 且首个 text part 以 `/` 或 `?` 开头时,CLI 自动进入 slash 流程;当文本包含未转义的 `@` 时,会调用 `handleAtCommand` 读取文件并在发送给模型前生成新的 `parts`。 +2. **显式模式(推荐给第三方)**:在 `input.command` 中描述命令,避免 CLI 解析字符串。 + ```jsonc + { + "session_id": "session-123", + "input": { + "origin": "user", + "parts": [{"type": "text", "text": "/chat list"}], + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + } + } + } + ``` + - `kind`: `slash` | `at`. + - `path`: 对于 slash 命令,是命令层级数组(等价于 `commandPath`);对于 `at`,则省略。 + - `args`: 剩余参数字符串。 + - `references`: 当 `kind="at"` 时可选,提前给出解析后的 `[{original:"@foo", resolved:"./src/foo.ts"}]`;若省略,CLI 将按隐式模式解析。 + +示例:显式引用文件 +```jsonc +{ + "session_id": "session-123", + "input": { + "origin": "user", + "parts": [{"type": "text", "text": "请审阅 @src/main.py"}], + "command": { + "kind": "at", + "references": [ + {"original": "@src/main.py", "resolved": "src/main.py"} + ] + } + } +} +``` + +#### SDK 侧命令协作 + +`useSlashCommandProcessor` 的执行结果可能包含多种动作(消息、弹窗、工具、提交 prompt、加载历史、退出等)。结构化模式通过 `action` 下发这些意图,SDK 需在本地处理效果,并在必要时再向 CLI 发送补充请求: + +| `command.result.type` | 说明 | SDK 动作建议 | +|-----------------------|------|--------------| +| `handled` | 已在 CLI 内部完成 | 无需处理 | +| `message` | 返回信息/错误 | 在 UI 显示通知 | +| `dialog` (`auth`/`theme`/`editor`/`privacy`/`settings`/`model`/`subagent_create`/`subagent_list`/`help`) | 需要弹窗或页面跳转 | 在第三方界面发起对应 UI | +| `tool` | 触发工具调用 | 将 `tool_request` 或命令参数转为工具请求,向 CLI 发送并监听结果 | +| `submit_prompt` | 立即向模型发送 PartListUnion | 将 `content` 作为下一条 `input.parts` 提交,并设置 `continuation=true` | +| `load_history` | 重置会话历史 | 调用 CLI 提供的历史设置接口或重新加载 UI 历史 | +| `quit` / `quit_confirmation` | 退出应用或询问确认 | 第三方控制自身生命周期,必要时触发 CLI 退出流程 | +| `confirm_shell_commands` | 需要用户确认 shell 命令 | 弹窗确认;批准后携带 `approvedCommands` / `confirmationOutcome` 再次调用命令 | +| `confirm_action` | 需要确认提示 | 同上,提供确认按钮并返回结果 | + +SDK 应暴露统一的命令执行 API,将用户输入映射为上述 `command`(显式或隐式),处理 `action`,并在必要时再次与 CLI 协调,确保行为与 TUI 一致。 + +CLI 会使用提供的 `resolved` 值读取文件;若未提供则回退到 `handleAtCommand` 的自动解析逻辑。 + +#### STDIN 命令回执 + +- 当 `--input-format = stream-json` 时,CLI 必须对 STDIN 中的 `/`、`?`、`@` 等命令指令保持即时反馈,与文本模式一致。解析逻辑沿用 `useSlashCommandProcessor` 与 `handleAtCommand`,不会因结构化模式而绕过。 +- 命令解析完成后,CLI 需向 STDOUT 写出结构化响应,形如: + ```jsonc + { + "type": "result/command", + "session_id": "session-123", + "prompt_id": "session-123########8", + "command": { + "kind": "slash", + "path": ["chat", "list"], + "args": "" + }, + "result": { + "type": "message", + "level": "info", + "content": "当前会话共有 3 条历史记录" + } + } + ``` + 其中 `result` 字段遵循上表的 `command.result.type` 枚举(`message`、`dialog`、`tool`、`submit_prompt` 等),以便 SDK 在收到 `stream-json` / `stream-chunk-json` 消息后立即驱动 UI 或后续请求。 +- 若命令触发进一步的模型调用(例如 `/submit`、`@file` 展开),CLI 会在输出中继续串联对应的 `assistant`/`tool_call`/`result` 消息,保持与 TUI 相同的顺序与会话字段,使第三方可以纯文本输入 + JSON 输出的方式完整复现交互。 + +#### 实时命令提示(Hint) + +- 结构化模式必须支持“字符触发提示”流程:当用户在第三方 UI 中输入 `/`、`@`、`?` 等触发字符但尚未按下回车时,集成方可立即发送 **Command Hint Request**,CLI 应返回对应提示数据而不写入会话历史。 +- Slash 请求示例(输入 `/c`): + ```jsonc + { + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "text": "/c", + "cursor": 2, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } + } + ``` +- Slash 响应示例(基于内置命令数据): + ```jsonc + { + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "status": "ok", + "suggestions": [ + { + "label": "chat", + "value": "chat", + "description": "Manage conversation history." + }, + { + "label": "clear", + "value": "clear", + "description": "clear the screen and conversation history" + }, + { + "label": "compress", + "value": "compress", + "description": "Compresses the context by replacing it with a summary." + }, + { + "label": "copy", + "value": "copy", + "description": "Copy the last result or code snippet to clipboard" + }, + { + "label": "corgi", + "value": "corgi", + "description": "Toggles corgi mode." + } + ], + "metadata": { + "is_perfect_match": false + } + } + ``` +- `@` 请求示例(输入 `@src/co`): + ```jsonc + { + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "at", + "text": "@src/co", + "cursor": 7, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } + } + ``` +- `@` 响应示例(来自 `useAtCompletion` 文件搜索): + ```jsonc + { + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "at", + "status": "ok", + "suggestions": [ + { + "label": "src/components/", + "value": "src/components/" + }, + { + "label": "src/components/Button.tsx", + "value": "src/components/Button.tsx" + }, + { + "label": "src/components/Button with spaces.tsx", + "value": "src/components/Button\\ with\\ spaces.tsx" + } + ], + "metadata": { + "is_perfect_match": false + } + } + ``` +- `?` 请求示例(输入 `/?`,等价于 `/help`): + ```jsonc + { + "type": "command_hint_request", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "text": "/?", + "cursor": 2, + "context": { + "cwd": "/workspace/demo", + "selected_text": "" + } + } + ``` +- `?` 响应示例(利用 `helpCommand` 的别名): + ```jsonc + { + "type": "result/command_hint", + "session_id": "session-123", + "prompt_id": "session-123########preview", + "trigger": "slash", + "status": "ok", + "suggestions": [ + { + "label": "help", + "value": "help", + "description": "for help on Qwen Code", + "matchedIndex": 0 + } + ], + "metadata": { + "is_perfect_match": true + } + } + ``` +- `suggestions` 结构复用 TUI 中 `Suggestion` 定义,`status="loading"` 表示 CLI 仍在准备数据(例如 `useAtCompletion` 初始化文件索引),前端可据此展示 Loading;`error` 时附带 `message`。 +- CLI 内部复用 `useSlashCompletion`、`useAtCompletion` 等逻辑生成提示;该类请求不会调用 `GeminiChat` 或写入历史,`prompt_id` 可带 `_preview` 后缀并在响应中原样返回。 +- 支持连续触发:每当输入内容或光标位置变化时,集成方可重复发送 `command_hint_request`,CLI 需进行节流/去抖后反馈最新提示。若前端取消提示,可发送 `{ "type": "command_hint_cancel", "session_id": "...", "prompt_id": "...", "trigger": "slash" }` 通知 CLI 终止耗时搜索。 + +#### 日志分层策略 + +- CLI 仍通过 `ConsolePatcher` 拦截 `console.log`/`warn`/`error`,并在 `x-qwen-terminal` 注解中携带 `channel="stderr"`、`source="console"` 与 `console_level`。 +- 为方便第三方过滤,建议在结构化模式下扩展 `annotations[].log_scope` 字段(取值如 `system`、`tool`、`debug`),默认与 TUI 中 `ConfigLogger` 的级别组合保持一致(见 `packages/cli/src/config/config.ts`)。 +- 对于工具执行产生的日志,可继续经由 `ToolResultDisplay` 输出;若需要更细粒度过滤,可在对应 `result_display` 中附加 `log_scope`。 + +#### 心跳与保活 + +- 结构化协议提供与实时提示类似的独立事件,不污染会话上下文: + - 集成方可定期发送 `{"type":"heartbeat_request","session_id":"session-123"}`(可选携带 `prompt_id`)。 +- CLI 以 `{"type":"result/heartbeat","session_id":"session-123","status":"ok","ts":1739430123}` 回复;亦可在后台主动推送相同事件。 +- 若超过约定时间(例如 10 秒)未收到心跳响应,第三方可判定子进程已挂起并执行重启。 +- `@third-party/anthropics/claude-agent-sdk-python` 当前未实现心跳机制,需由本项目 CLI/SDK 自行补足;P1.1 实施时需定义默认间隔、超时策略及是否允许 SDK 自定义心跳频率。 + +#### 实时中断(Escape 指令) + +- 结构化模式必须暴露与 TUI 相同的“终止当前响应”能力。TUI 通过 `useGeminiStream.ts` 中的 `useKeypress` 监听 ESC 键并调用 `cancelOngoingRequest`:该流程会中止 `AbortController`、记录 `ApiCancelEvent` 遥测、补齐 `pendingHistoryItem`,并向历史追加“Request cancelled.” 等提示。 +- 集成方可在任意时刻通过 STDIN 写入下述控制消息来触发相同行为: + ```jsonc + { + "type": "control/cancel", + "session_id": "session-123", + "prompt_id": "session-123########8", + "reason": "escape" + } + ``` + - `session_id`: 必填,用于定位当前会话; + - `prompt_id`: 可选;若提供,CLI 仅当该 prompt 正在 `Responding`/`WaitingForConfirmation` 状态时才执行取消;缺省时默认取消最近一次启动的请求; + - `reason`: 预留枚举,当前固定为 `"escape"`,后续可扩展 `"keyboard_interrupt"`、`"timeout"` 等。 +- CLI 响应要求: + - 若存在可取消的流式请求,必须复用 `cancelOngoingRequest` 的逻辑:调用 `AbortController.abort()`、写入 `ApiCancelEvent`、冲刷 `pendingHistoryItem` 并重置补全状态。 + - 立即向 STDOUT 输出 `{"type":"result/cancel","session_id":"session-123","prompt_id":"session-123########8","status":"ok","message":"Request cancelled."}`,便于第三方 UI 更新状态。 + - 当底层流返回 `GeminiEventType.UserCancelled` 事件时,追加发送 `{"type":"x-qwen-session-event","event":"USER_CANCELLED","message":"User cancelled the request."}`,提示会话被中断。 + - 若当前不存在可取消的请求,则响应 `{"type":"result/cancel","session_id":"session-123","status":"noop"}`,不再触发其它事件。 +- 双击 ESC 清空输入属于客户端自身的输入框逻辑;结构化模式下的集成方可在本地复用该交互,无需再向 CLI 发送额外消息。 + +#### 双向控制通道 + +- 代码现状:`third-party/qwen-code` 目前只支持单向输出(CLI → STDOUT),TUI 中的确认/对话框逻辑直接在进程内处理,缺少像 Claude Code 那样的 `control_request` / `control_response` hook。 +- 设计需求:为了让 SDK 集成具备与 TUI 等价的能力(命令确认、敏感操作授权、子代理调度等),协议需复用“独立事件 + STDIN 回写”模式,实现真正的双向通信。 + - 当 CLI 需要外部输入时,输出 `{"type":"control_request","session_id":"session-123","request_id":"req-1","request":{"subtype":"confirm_shell_commands",...}}`。 + - 支持 In-Process MCP Server 时,CLI 还需输出 `subtype: "mcp_message"`,实体内携带 JSON-RPC (`tools/list`、`tools/call`、`initialize` 等);SDK 处理后在 `control_response` 中返回 `{"response":{"mcp_response":{...}}}`。 + - 第三方应用处理后,通过 STDIN 写回 `{"type":"control_response","request_id":"req-1","response":{"subtype":"success","result":{"behavior":"approve"}}}`。 + - 该事件不写入会话历史,保持与 `command_hint`、`heartbeat` 相同的旁路通道。 +- 场景覆盖: + - `/confirm_shell_commands`、`confirm_action`、`quit_confirmation` 等需要用户响应的命令。 + - In-Process MCP Server 工具调用链路(`mcp_message` → `mcp_response`)。 + - 工具权限审批(类似 `can_use_tool`)、计划执行或子代理调度需要外部确认的步骤。 + - 未来扩展的弹窗、表单、身份验证流程。 +- 回退策略:在通道未启用时,CLI 应采用显式策略(例如自动拒绝危险操作、提示“在结构化模式下不可用”),并在 `result/command` 中返回明确错误,避免静默失败。 +- 后续工作: + 1. 在 RFC 中追加 `control_request`/`control_response` 的 JSON Schema(参考 Claude Code 实现)。 + 2. 在 CLI 中抽象统一的控制消息分发层,使 TUI 与 CLI 复用同一逻辑。 + 3. 在 SDK 中实现监听与响应,暴露钩子给上层 UI。 + +#### 事件机制分类 + +为便于第三方实现统一的事件路由与处理,本 RFC 将结构化 STDIN/STDOUT 消息归纳为三类: + +1. **结果事件 (`result/*`)** + - CLI → STDOUT 的单向通告,例如 `result/command`、`result/command_hint`、`result/heartbeat`、`result/cancel`、`x-qwen-session-event`。 + - 承载命令输出、提示建议、心跳反馈、取消结果及会话状态更新,不要求 SDK 回执。 + - 建议第三方根据 `type` 字段实现事件派发,确保不同 UI/服务都能统一处理。 + +2. **请求事件 (`*request`)** + - 第三方 → STDIN 的前向指令,例如 `command_hint_request`、`heartbeat_request`、`control/cancel`。 + - 用于触发 CLI 的即时响应: + - `command_hint_request` 获取 `/`、`@` 等提示建议; + - `heartbeat_request` 维持保活; + - `control/cancel` 终止当前响应。 + - CLI 会以对应的 `result/*` 事件回复(如 `result/command_hint`、`result/heartbeat`、`result/cancel`)。 + +3. **控制通道事件 (`control_request`/`control_response`)** + - CLI → STDOUT 输出 `control_request`,SDK/第三方需在 STDIN 写回匹配的 `control_response`; + - 用于需要回执的回调场景:工具授权 (`can_use_tool`)、Hook (`hook_callback`)、规划中的 MCP 调用 (`subtype: "mcp_message"`) 等; + - 每条 `control_request` 包含唯一 `request_id`,SDK 必须在合理超时内返回结果或错误,避免 CLI 阻塞。 + - `control_request` 不写入会话历史,而通过控制层旁路处理,保持与 TUI 行为一致。 + +以上三类事件通过统一的 JSON Lines 协议传输,可视作同一事件机制的不同子类。集成方在实现时应: + +- 按 `type` 或 `subtype` 进行分发,避免将 `control_request` 与一般 `result/*` 混淆; +- 确保请求事件在 STDIN 写入后正确等待相应的 `result/*` 反馈; +- 对控制通道事件实现健壮的回执与超时处理,以防 CLI 阻塞或进入不一致状态。 + +### JSON Schema 与版本协商 + +- 在 OpenAI `/chat/completions` 基础上精简为“增量输入”模型,保留 `model`、`tools` 等字段,并新增 `session_id`、`prompt_id`、`origin`、`tool_call_id` 等会话字段。 +- CLI 在首个 `chat.completion` 对象的 `metadata`(或 `system_fingerprint` 扩展段)中附带 `protocol_version`、`output_format`、`input_format`、`capabilities`(当支持 chunk 输出时应包含 `chat.completion.chunk` 能力位)。 +- SDK 若请求超出 CLI 能力(例如 `protocol_version=3`),CLI 将返回 `chat.completion` 对象,其中 `choices[0].finish_reason="error"` 并在 `usage` 或 `metadata.error` 中携带 `unsupported_protocol` 描述,同时以非零退出码终止。 + +### 错误语义 + +- **致命错误**: 输出 OpenAI 风格错误对象,并以非零退出码终止: + ```json + { + "error": { + "message": "invalid tool input", + "type": "invalid_request_error", + "param": "tools[0].function.arguments", + "code": "QWEN_INVALID_TOOL_ARGS" + } + } + ``` +- **可恢复错误 / 任务失败**: 返回 `chat.completion` 对象,`choices[0].finish_reason` 设为 `stop`(或 `tool_calls`)并在 `choices[0].message.content` / `metadata.error` 中说明失败原因,CLI 继续保持健康状态。 +- **控制协议异常**: 若工具授权、Hook 回调出错,CLI 通过 `chat.completion` 对象携带 `metadata.control_errors`,SDK 可据此决定是否重试或中断。 + +## 安全与资源控制 + +- CLI 在 `stream-json` 模式下仍遵循现有的 Approval/工具白名单逻辑。 +- 计划中的 `control_request`/`control_response` 通道将允许 SDK/第三方在敏感操作前进行审计;在实现之前,应默认拒绝需要授权的动作,或要求用户回退到 TUI 模式。 +- 支持在协议层扩展 `heartbeat`,若超时可触发进程回收,避免资源泄漏。 + +## 可观测性与调试 + +- 第三方后端服务/SDK 记录消息序列到审计日志,便于重放与问题定位。 +- 提供示例命令: + + ```bash + echo '{"model":"qwen-coder","messages":[{"role":"user","content":"你好"}]}' \ + | qwen --input-format stream-json --output-format stream-json + + echo '{"model":"qwen-coder","messages":[{"role":"user","content":"逐字输出问候"}]}' \ + | qwen --input-format stream-json --output-format stream-chunk-json + ``` diff --git a/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_ref_claude_clear_cn.md b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_ref_claude_clear_cn.md new file mode 100644 index 000000000..815e89694 --- /dev/null +++ b/docs/rfc/qwen-code-cli-output-format-stream-json-rfc_ref_claude_clear_cn.md @@ -0,0 +1,624 @@ +# RFC: Qwen-Code CLI 结构化输入输出规范(整理版) + +## 概览 + +| 字段 | 详情 | +| --- | --- | +| 状态 | Draft | +| 更新时间 | 2025-10-13 | +| 作者 | x22x22 | +| 追踪 | | +| 范围 | CLI 层 `--input-format/--output-format` 结构化协议、事件语义、错误规范与落地计划 | + +- 目标是为第三方系统与多语言 Agent SDK 提供稳定、可编程的 IPC Stream JSON 能力。 +- 协议保持与 TUI 相同的行为,补齐 JSON Lines 输出、对称输入以及控制通道,回应社区关于 `--input-format/--output-format json/stream-json` 的诉求。 +- 文档聚焦 CLI 侧能力,不涵盖 SDK 内部设计。 + +## 背景与场景 + +### Issue 795 概述 +- 社区在 issue [#795](https://github.com/QwenLM/qwen-code/issues/795) 中请求为 CLI 增加 `--input-format/--output-format json/stream-json`,希望参考 Claude Code 的实现,提供可被程序稳定消费的结构化 IO。 +- RFC 旨在扩展 CLI,使其在保持 TUI 兼容的同时,为 SDK 与第三方后端提供对称的 JSON 协议与消息语义。 + +### 集成方场景 +- **任务级串行处理**:SDK 逐条发送 prompt,同时在多个阶段接收 CLI 返回的数据并二次处理后再展示给最终用户。 +- **任务级流式直传**:SDK 逐条发送 prompt,CLI 的流式响应被直接转发给用户,保持实时可见性。 +- **指令提示与快捷符号**:第三方输入框中的 `/`、`@`、`?` 等触发行为需要与 TUI 完全一致,确保不同前端体验统一。 +- **前端终端模拟**:利用 xterm.js 等库复刻终端交互,输入区域与终端输出分离,但仍需消费 CLI 的完整终端语义。 + +### 集成方式 +- 第三方程序依赖后续提供的多语言 `qwen-code-agent-sdk`。 +- SDK 通过子进程方式启动 `qwen code`,并以 STDIN/STDOUT 建立双向 IPC。 +- SDK 负责读取 CLI 的结构化输出,并完成错误处理与状态管理。 +- 第三方应用消费 SDK 的结果,在自身 UI 或后端逻辑中呈现。 + +### 现状痛点 +- CLI 仅面向人工交互的纯文本 STDOUT,输出语义不稳定,难以被自动化消费。 +- 缺乏对称的结构化输入协议,无法驱动高级自动化与工具调度流程。 + +## 目标与范围 + +| 类型 | 内容 | +| --- | --- | +| 设计目标 | 可配置输出格式、JSON Lines 流式协议、对称结构化输入、通用 schema、面向 SDK 友好化 | +| 非目标 | 描述 SDK 具体实现(另见 Agent 框架文档) | +| 核心痛点 | 仅有人机交互 STDOUT、缺少结构化输入、无法驱动自动化流程 | +| 场景示例 | SDK 分批发送 prompt 并处理多段响应;流式直传用户;`/`,`@`,`?` 指令与 TUI 对齐;xterm.js 前端分离输入与终端 | + +## 接口总览 + +| 类别 | 关键项 | 说明 | +| --- | --- | --- | +| CLI 参数 | `--input-format`、`--output-format`、`--include-partial-messages` | SDK 默认固定 `--output-format stream-json`,在流式模式追加 `--input-format stream-json`;`--include-partial-messages` 可开启 Qwen-Code 式增量输出 | +| 输出事件 | `user`、`assistant`、`system`、`result`、`stream_event`、`control_request`、`control_response`、`control_cancel_request` | 全部以 JSON Lines 逐行写入 STDOUT,字段对齐 Claude Code CLI | +| 输入事件 | `user`、`control_request`、`control_response`、`control_cancel_request` | JSON 行写入 STDIN,对称驱动 CLI 行为,Envelope 与 Claude Python SDK 一致 | +| 通道语义 | 会话事件、控制通道、流式事件 | 明确回执要求(默认 60s 超时),防止 CLI 阻塞 | +| 协议扩展 | 握手元数据、版本协商、错误语义 | 参考 Claude `initialize`、`result`、错误对象结构 | + +- 通信仍使用标准输入输出,未引入额外套接字。 +- `text` 模式保留原行为,结构化模式提供稳定 schema 与可观测字段。 + +## 输出格式语义 + +| 格式 | 适用场景 | 行为概要 | 兼容性 | +| --- | --- | --- | --- | +| `text` | 手动调试兼容模式 | 输出原有 TUI 文本 | 仅 CLI 手动/降级模式使用 | +| `stream-json` | Claude Code 样式 JSONL | 每行含 `type` 字段:`assistant`、`user`、`result`、`stream_event`、`control_*` 等 | 对齐 Claude Code CLI | +| `stream-json` + `--include-partial-messages` | 增量消息 JSONL | 在标准事件外追加部分 `assistant` 内容与 `stream_event`,便于实时展示 | 对齐 Claude Code `include_partial_messages` | + +### 消费策略 + +- **消息级 JSONL(`stream-json`)**:适合第三方后端或 CLI 包装器按阶段消费结果,与 Claude Agent SDK 默认行为一致。 +- **增量模式(`stream-json` + `--include-partial-messages`)**:面向 IDE/UI“边生成边展示”的实时场景,SDK 需监听 `stream_event` 中的 `content_block_delta` 与 `message_stop` 事件完成聚合。 +- **终端语义一致性**:Claude Code CLI 不提供独立的终端注解事件;stdout/stderr 相关内容会以文本或工具结果形式写入 `assistant` 消息,增量期间由 `content_block_delta` 输送文本片段,宿主若需还原 ANSI 可解析内容本身。 + +### `stream-json` 示例 + +```json +{"type":"system","subtype":"init","session_id":"session-123","data":{"model":"qwen3-coder"}} +{"type":"stream_event","uuid":"evt-1","session_id":"session-123","event":{"type":"message_start","index":0}} +{"type":"stream_event","uuid":"evt-2","session_id":"session-123","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"正在分析…"}}} +{"type":"stream_event","uuid":"evt-3","session_id":"session-123","event":{"type":"message_stop","index":0}} +{"type":"control_request","request_id":"req-tool-1","request":{"subtype":"can_use_tool","tool_name":"edit_file","input":{"path":"src/main.py","patch":"--- a/src/main.py\n+++ b/src/main.py\n@@\n-print('Hi')\n+print('Hello')"}}} +{"type":"control_response","response":{"subtype":"success","request_id":"req-tool-1","response":{"behavior":"allow","message":"已授权","interrupt":false}}} +{"type":"assistant","message":{"role":"assistant","model":"qwen3-coder","content":[{"type":"tool_result","tool_use_id":"toolu_1","content":[{"type":"text","text":"命令执行成功"}],"is_error":false}]}} +{"type":"result","subtype":"session_summary","duration_ms":6021,"duration_api_ms":5870,"is_error":false,"num_turns":2,"session_id":"session-123","total_cost_usd":0.0021,"usage":{"input_tokens":820,"output_tokens":640}} +``` + +### 增量消息行为要点 + +- Claude CLI 仅提供 `stream-json`,当启用 `--include-partial-messages` 时,会把 Anthropic RawMessageStreamEvent 原样封装为 `stream_event`,常见事件类型包括 `message_start`、`content_block_start`、`content_block_delta`、`content_block_stop` 与 `message_stop`。 +- `stream_event.event` 中的字段与 Claude SDK `StreamEvent` dataclass 一致:`index` 对应内容块序号,`delta.type` 可能是 `text_delta`、`thinking_delta` 或 `input_json_delta` 等,SDK 需自行聚合增量并在 `message_stop` 后完成收尾。 +- `assistant` 消息在同一回合内仍返回完整的 `content` 列表(文本、工具调用、思考块等),方便第三方在增量消费结束后复用统一结构。 +- `result` 消息提供 `usage`、`duration_ms`、`duration_api_ms`、`num_turns`、`session_id` 等统计字段,可直接映射到原 RFC `chat.completion` 的资源消耗语义。 + +## 事件载荷与注解 + +Claude CLI 的结构化输出由 `third-party/anthropics/claude-agent-sdk-python` 中的 `SubprocessCLITransport` 读取并交由 `message_parser` 解析为强类型对象。`assistant`、`user`、`system`、`result`、`stream_event` 与 `control_*` 事件共同覆盖对话内容、工具调度、统计汇总与控制协议。对应关系如下: + +| 类型 | 主要字段 | 用途 | +| --- | --- | --- | +| `assistant` 内容块 | `content[].type`、`content[].text`/`thinking`/`tool_use`/`tool_result` | 复刻 Claude CLI 的会话消息与工具调用流程 | +| `stream_event` | `event.type`、`event.delta`、`event.index` | 原样承载 Anthropic RawMessageStreamEvent(`message_start`、`content_block_delta` 等)以便还原增量输出 | +| `system` | `subtype`、`data` | 广播初始化信息(模型、可用工具、slash 命令、设置来源等)或后续状态通知 | +| `control_request.*` | `request.subtype`、`request_id`、`request` | 工具审批、Hook 回调、MCP JSON-RPC 桥接 | +| `result` | `subtype`、`duration_ms`、`num_turns`、`usage` | 汇总任务统计、错误信息与成本 | + +### `StreamEvent` 示例 + +```json +{ + "type": "stream_event", + "uuid": "evt-delta-1", + "session_id": "session-123", + "event": { + "type": "content_block_delta", + "index": 0, + "delta": { + "type": "text_delta", + "text": "命令执行成功" + } + } +} +``` + +- 常见事件类型:`message_start`(会话增量开始)、`content_block_start`(声明内容块类型)、`content_block_delta`(增量文本或思考内容)、`content_block_stop`、`message_stop`。 +- `delta.type` 会区分 `text_delta`、`thinking_delta`、`input_json_delta` 等,第三方需结合 `event.index` 与 `parent_tool_use_id` 聚合增量。 +- 未启用 `--include-partial-messages` 时不会输出 `stream_event`;所有输出仅由完整的 `assistant`/`result` 构成。 + +### 工具结果展示 + +```json +{ + "type": "assistant", + "message": { + "role": "assistant", + "model": "qwen3-coder", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_1", + "is_error": false, + "content": [ + {"type": "text", "text": "diff --git a/src/main.py b/src/main.py\n@@\n-print('Hi')\n+print('Hello')"} + ] + } + ] + } +} +``` + +- `tool_use_id`:与前一条 `tool_use` 块配对,Claude SDK 会将其映射为 `ToolResultBlock`。 +- `content` 可以是字符串或内容块数组;当返回 diff、JSON 等结构化信息时,可拆成多个 `text`/`image` 块。 +- `is_error=true` 时表示工具执行失败,SDK 应展示错误提示或回退。 + +### 思考与会话事件 + +```json +{ + "type": "assistant", + "message": { + "role": "assistant", + "model": "qwen3-coder", + "content": [ + { + "type": "thinking", + "thinking": "Analyzing repository layout…", + "signature": "sig-001" + } + ] + } +} +``` + +- `thinking` 块展示 Claude 的内部推理,`signature` 可用于安全校验。 + +### SystemMessage 示例 + +```json +{ + "type": "system", + "subtype": "init", + "session_id": "session-123", + "data": { + "cwd": "/workspace/demo", + "model": "qwen3-coder", + "slash_commands": ["commit", "run"], + "tools": ["read", "write", "run_command"], + "setting_sources": ["user"], + "permission_mode": "manual" + } +} +``` + +- `system` 消息常以 `subtype="init"` 开场,`data` 中包含工作目录、可用工具、可加载的 slash 命令、权限模式等元数据。 +- 后续系统提示可能使用其他 `subtype`(如 `status_update`、`warning`),结构同样承载在 `data` 字段内。 + +## 输入格式 + +| 模式 | 行为 | 说明 | +| --- | --- | --- | +| `text` | 保留原有 TUI 输入体验 | 仅适用于手动调试或降级模式,沿用 CLI 传统逐行解析 | +| `stream-json` | Claude Envelope JSONL | 每行 JSON 传递 `user`、`assistant`、`result`、`control_*` 等事件,SDK 以流式方式驱动 CLI | + +### 用户消息 Envelope + +```jsonc +{"type":"user","message":{"role":"user","content":[{"type":"text","text":"请修复 main.py 的 bug"}]},"parent_tool_use_id":null} +``` + +- `message.content` 可为字符串或 `ContentBlock` 数组;当内容包含工具回执时应为 `tool_result` 块。 +- `parent_tool_use_id` 允许在工具串联或子任务续写时保持上下文关联,Claude SDK 会基于该字段恢复父工具调用。 +- SDK 可通过 `options` 字段覆写一次性参数(模型、权限模式、是否包含部分消息等),格式与 `ClaudeAgentOptions` 保持一致。 +- 写入完成后可调用 `end_input()`(或关闭 stdin)提示 CLI 当前回合结束,`SubprocessCLITransport` 会向 CLI 发送 EOF。 + +### 工具结果回传 Envelope + +```jsonc +{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_1","content":[{"type":"text","text":"命令输出"}],"is_error":false}]}} +``` + +- 当 CLI 通过 `control_request.can_use_tool` 请求外部执行操作时,SDK 负责落地实际动作并将结果回写为 `tool_result` 块。 +- `is_error=true` 表示工具失败,CLI 将以 `assistant` 消息或 `result` 反馈错误并决定是否中断后续流程。 +- 允许在 `content` 中混合多种块(文本、富媒体、JSON),Claude SDK 会按 `ContentBlock` 定义解析。 + +### 会话控制与初始化 + +- Claude SDK 在建立流式会话时会先发送 `control_request.initialize`,CLI 以 `control_response.success` 回传,`response` 中通常包含: + - `commands`:CLI 支持的命令与子命令列表。 + - `output_styles`/`output_style`:默认输出格式(通常为 `stream-json`)。 + - `serverInfo`:CLI 侧标识、版本号、可选能力标志。 +- SDK 在构造初始化请求时会将 Hook 与 MCP 配置编码至 `request` 字段(如 `hooks`、`mcp`、`permission_mode`),字段结构与 `InitializeRequest` / `InitializeSuccessPayload` 对齐。【参考 `src/claude_agent_sdk/_internal/query.py`】 +- 初始化完成后,CLI 将继续输出 `assistant`/`stream_event`/`result` 等消息,保持与 TUI 一致的事件顺序。 + +### SDK 主动控制请求 + +- Claude SDK 通过 `_send_control_request()` 发送下列子类型: + - `interrupt`:请求 CLI 终止当前任务,常用于用户点击“Stop”。 + - `set_permission_mode`:动态调整权限模式(如 `auto`、`manual`)。 + - `set_model`:切换模型,允许传入 `null` 表示恢复 CLI 默认。 + - `initialize`:首个握手请求,封装 Hook、权限、MCP 配置。 +- 每个请求包含唯一 `request_id`,SDK 会等待最长 60 秒的 `control_response` 回执,超时将抛出 `Control request timeout` 并清理挂起状态。 + +### CLI 主动控制请求与取消 + +- CLI 在需要外部确认或执行扩展逻辑时输出 `control_request`: + - `can_use_tool`:征询工具执行权限,可携带 `permission_suggestions`、`blocked_path` 等上下文。 + - `hook_callback`:触发 SDK 侧自定义 Hook,并将返回值通过 `control_response` 回写。 + - `mcp_message`:封装 JSON-RPC 报文与服务器名称,交由 SDK 托管的 MCP Server 处理。 +- 若需要放弃等待,CLI 会发送 `control_cancel_request`,SDK 应停止对对应 `request_id` 写回结果,避免状态错乱。 + +## 实时事件 + +| 能力 | 事件 / 请求 | 响应 | 说明 | +| --- | --- | --- | --- | +| 增量输出 | `stream_event.message_start` / `content_block_*` | `assistant` 消息或 `stream_event.message_stop` | 描述增量生成生命周期,便于还原逐 token / 块输出 | +| 控制回执 | `control_request.*` | `control_response.success/error` | 工具审批、Hook/MCP 回调需严格配对 `request_id` | +| 中断 | SDK 发送 `control_request.interrupt` | `control_response.success` + `result` | 停止当前任务并返回汇总信息 | +| 取消提示 | `control_cancel_request` | 无需响应 | CLI 请求放弃等待某个控制回执,SDK 仅记录并清理挂起状态 | + +```jsonc +{"type":"stream_event","uuid":"evt-2","session_id":"session-123","event":{"type":"message_stop","index":0}} +``` + +- SDK 应组合前序 `content_block_delta` 与此处的 `message_stop` 来生成最终文本。 +- Claude CLI 未提供额外的 `command_hint_request`、`heartbeat_request` 或 keepalive 事件,宿主应用需自行设定超时与保活策略。 + +## 事件分类与通道 + +| 类别 | 方向 | 代表事件 | 回执要求 | 作用 | +| --- | --- | --- | --- | --- | +| 会话主通道 | CLI → STDOUT | `user`(回放)、`assistant`、`result`、`system`、`stream_event` | 无需回执 | 呈现对话、工具输出、统计、保活 | +| 控制通道 | CLI ↔ STDIO | `control_request` / `control_response` | 必须匹配 `request_id` | 工具审批、Hook、MCP、初始化、权限切换 | +| 输入通道 | SDK/第三方 → STDIN | `user`、`control_request`(SDK 发起) | CLI 必须处理或返回错误 | 驱动用户输入、主动控制、取消 | + +- `system` 消息用于广播初始化能力,`result` 消息返回统计与错误信息,`stream_event` 则承载增量输出;三类事件共同覆盖主通道语义。 +- 所有事件均以 JSON Lines 传输,建议按照 `type`、`subtype` 与 `event.type` 路由,并为控制通道实现超时机制避免阻塞。 + +## 控制请求与响应 + +| 字段 | 说明 | +| --- | --- | +| `type` | 固定为 `control_request` 或 `control_response` | +| `request_id` | 唯一标识,请求与响应配对 | +| `request` | 当 `type=control_request` 时携带子类型与明细,例如 `can_use_tool`、`hook_callback`、`mcp_message` | +| `response` | 当 `type=control_response` 时包含 `subtype`=`success`/`error` 及返回值 | + +- `control_request` 示例: + +```jsonc +{ + "type": "control_request", + "request_id": "req-1", + "request": { + "subtype": "can_use_tool", + "tool_name": "edit_file", + "input": {"path": "main.py", "patch": "..."}, + "permission_suggestions": null + }, + "session_id": "session-123" +} +``` + +- 成功回执示例: + +```jsonc +{ + "type": "control_response", + "response": { + "subtype": "success", + "request_id": "req-1", + "response": { + "behavior": "allow", + "updatedInput": null, + "message": "允许执行" + } + } +} +``` + +- 失败回执示例: + +```jsonc +{ + "type": "control_response", + "response": { + "subtype": "error", + "request_id": "req-1", + "error": "Tool execution denied by policy" + } +} +``` + +- MCP 集成:当 `subtype="mcp_message"` 时,`request` 中包含 `server_name` 与 JSON-RPC `message`;SDK 需在 `response.response.mcp_response` 中回传规范化结果。【参考 `query.py` 及 `types.py`】 +- 若 CLI 发送 `control_cancel_request`,SDK 必须停止等待并记录取消原因。Claude SDK 当前仅记录日志,后续可扩展主动回执。 + +## 版本协商与错误语义 + +| 项目 | 说明 | +| --- | --- | +| 协议版本 | `control_response.success` (initialize) 可返回 `protocolVersion`、`supportedOutputStyles`、`capabilities`,SDK 应缓存用于兼容性判断 | +| 版本不匹配 | 若 CLI 不支持请求的能力,会输出 `result`(`subtype="error"`、`is_error=true`)并在 stderr 打印 Anthropic 错误 JSON,SDK 侧会抛出异常终止流程 | +| 致命错误 | CLI 遵循 Claude Code 错误对象格式(`type`、`message`、`code`),同时写入 `stderr`,SDK 将其包装为异常 | +| 可恢复错误 | 可通过 `result` 或 `assistant` 返回错误消息,`is_error=true` 并保持进程健康 | +| 控制协议错误 | `control_response.error` 的 `error` 字段提供文本描述,SDK 可决定重试、降级或中止 | + +致命错误示例: + +```json +{ + "error": { + "message": "invalid tool input", + "type": "invalid_request_error", + "param": "tools[0].function.arguments", + "code": "QWEN_CODE_INVALID_TOOL_ARGS" + } +} +``` + +## 安全与资源控制 + +| 领域 | 策略 | +| --- | --- | +| 权限 | 沿用 Claude CLI 审批模型;未收到 `control_response` 时默认拒绝高风险操作,并提示需要人工确认 | +| 审计 | 建议记录完整 JSONL 流(含 `control_request`/`response`、`user` 输入),便于复盘与合规检查 | +| 保活 | Claude CLI 未内置 keepalive 事件;建议结合 `stream_event` 活跃度与控制通道超时实现监控 | +| 资源释放 | `SubprocessCLITransport.close()` 会在会话结束时清理子进程与异步任务组,第三方应在异常时调用 | + +## 日志分层与可观测 + +| 组件 | 要点 | +| --- | --- | +| 会话输出 | `assistant` 文本块与 `stream_event` 增量共同覆盖 stdout/stderr,必要时可在内容中解析 ANSI | +| `result` 统计 | `duration_ms`、`usage`、`total_cost_usd` 便于计费与性能分析 | +| Hook/MCP 追踪 | `control_request`/`response` 可附带自定义元数据,建议记录以支持端到端追踪 | +| OpenTelemetry | SDK/CLI 可分别注入 OTel Trace,将 `request_id` 和 `session_id` 作为 Span 属性以便串联 | + +- 推荐将所有消息持久化到审计日志,结合 `session_id`、`request_id`、`uuid` 实现重放与问题定位。 + +## 调试示例 + +```bash +echo '{"type":"user","message":{"role":"user","content":[{"type":"text","text":"你好"}]}}' | qwen code --input-format stream-json --output-format stream-json --model qwen-coder + +echo '{"type":"user","message":{"role":"user","content":[{"type":"text","text":"逐字输出问候"}]}}' | qwen code --input-format stream-json --output-format stream-json --include-partial-messages +``` + +- 结合 `jq -c` 可验证事件流是否符合 Claude Envelope 约定。 +- 建议同时监控 CLI `stderr`,以捕获致命错误或协议不匹配提示。 + +## 会话消息 Schema 摘要 + +为了便于第三方实现与 Claude SDK 对齐的解析逻辑,本节整理主要 Envelope 与内容块结构,均来源于 `third-party/anthropics/claude-agent-sdk-python/src/claude_agent_sdk/types.py` 与 `message_parser.py`。 + +### `AssistantMessageEnvelope` + +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `type` | `"assistant"` | 固定值 | +| `message.role` | `"assistant"` | 角色标识 | +| `message.model` | `string` | 实际使用的模型名称 | +| `message.content` | `ContentBlock[]` | 混合文本、思考、工具调用、工具结果 | +| `parent_tool_use_id` | `string?` | 指向父工具调用,便于链式对话 | + +### `UserMessageEnvelope` + +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `type` | `"user"` | 用户输入或回放 | +| `message.content` | `string | ContentBlock[]` | 原始文本或结构化内容 | +| `parent_tool_use_id` | `string?` | 当消息是工具结果续写时必填 | + +### `ResultMessageEnvelope` + +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `type` | `"result"` | 汇总事件 | +| `subtype` | `string` | 常见值:`session_summary`、`tool_execution`、`error` | +| `duration_ms` / `duration_api_ms` | `number` | 总耗时与 API 耗时 | +| `is_error` | `boolean` | 是否为错误场景 | +| `usage` | `object?` | token 统计,字段同 Claude SDK `Usage` | +| `total_cost_usd` | `number?` | 计费信息 | + +### `StreamEventEnvelope` + +| 字段 | 类型 | 说明 | +| --- | --- | --- | +| `type` | `"stream_event"` | 流式事件 | +| `uuid` | `string` | 事件唯一标识 | +| `session_id` | `string` | 会话 ID | +| `event.type` | `string` | `message_start`、`content_block_start`、`content_block_delta`、`content_block_stop`、`message_stop` | +| `event.index` | `number?` | 指向内容块序号,与 `assistant` 消息的块顺序一致 | +| `event.delta` | `object?` | 当 `event.type` 为 `content_block_delta` 时包含 `text_delta`/`thinking_delta` 等增量 | +| `event` 其他字段 | `object?` | 原样保留 Anthropic RawMessageStreamEvent 的附加信息 | + +## 控制 Schema 摘要 + +下表列出常见 `control_request` / `control_response` 子类型及关键字段,方便 SDK/第三方严格校验。 + +| 子类型 | 方向 | 关键字段 | 说明 | +| --- | --- | --- | --- | +| `initialize` | SDK → CLI | `hooks`、`mcp`、`permission_mode` | 建立握手,公布 Hook、MCP 与权限配置 | +| `interrupt` | SDK → CLI | - | 中断当前任务,CLI 返回 `control_response.success` 并输出 `result` | +| `set_permission_mode` | SDK → CLI | `mode` | 切换 `auto`/`manual` 等权限模式 | +| `set_model` | SDK → CLI | `model` | 动态切换模型,支持 `null` 恢复默认 | +| `can_use_tool` | CLI → SDK | `tool_name`、`input`、`permission_suggestions` | 工具执行授权,SDK 返回 `behavior`、`updatedInput` 等 | +| `hook_callback` | CLI → SDK | `callback_id`、`input` | 调用已注册 Hook,支持同步/异步结果 | +| `mcp_message` | CLI → SDK | `server_name`、`message` | 与 MCP Server 通信,遵循 JSON-RPC | +| `control_cancel_request` | CLI → SDK | `request_id`、`reason` | 终止挂起控制请求,SDK 应停止回执 | + +成功响应 (`control_response.success`) 常用 payload: + +- `InitializeSuccessPayload`:携带 `commands`、`output_style`、`serverInfo`、`capabilities`。 +- `ToolPermissionSuccessPayload`:`behavior`=`allow`/`deny`,可选 `updatedInput`、`updatedPermissions`、`message`。 +- `HookCallbackSuccessPayload`:`async`、`continue`、`decision`、`hookSpecificOutput` 等字段。 +- `McpMessageSuccessPayload`:封装 `mcp_response`,与 JSON-RPC 返回结构一致。 +- `AckOnlyPayload`:用于无附加数据的确认(如 `interrupt` 成功)。 + +错误响应 (`control_response.error`) 在 `error` 字段提供字符串描述,可结合 `result` 或 `stream_event` 提示用户。 + +## SDK ↔ CLI 协作流程速记 + +1. **启动阶段**:`SubprocessCLITransport` 追加 `--output-format stream-json`、`--input-format stream-json`,合并环境变量并启动 `qwen` 进程,保持 STDIN/STDOUT 管道打开。 +2. **握手阶段**:SDK 发送 `control_request.initialize`,CLI 回以 `control_response.success`,并在后续输出首批 `assistant`/`system`/`result` 消息公布能力与状态。 +3. **主循环**: + - SDK 写入 `user` 消息或主动控制请求。 + - CLI 输出 `assistant`、`stream_event`、`result` 等事件,并在需要授权时推送 `control_request`。 + - SDK 匹配 `request_id` 写回 `control_response`,如有工具执行结果则以 `user` 消息的 `tool_result` 内容块反馈。 +4. **取消/中断**:SDK 可发送 `control_request.interrupt`,或由 CLI 输出 `control_cancel_request` 终止挂起请求;双方需清理本地状态并记录日志。 +5. **收尾阶段**:CLI 输出最终 `result`(`is_error`/`session_summary`),随后退出或等待下一轮输入;传输层在检测到 EOF 时释放子进程与缓冲队列。 + +该流程在 Claude SDK 中由 `Query` 协程驱动,第三方实现可参考相同的状态机、超时与异常处理策略。 + +## 源码索引(Claude 规范相关) + +| 功能 | 文件 | 说明 | +| --- | --- | --- | +| 传输层 | `src/claude_agent_sdk/_internal/transport/subprocess_cli.py` | 启动 CLI、处理 JSONL、维护进程生命周期 | +| 控制与会话调度 | `src/claude_agent_sdk/_internal/query.py` | 握手、控制请求发送、输入流收尾、事件路由 | +| 消息解析 | `src/claude_agent_sdk/_internal/message_parser.py` | 将 JSON 行解析为 `AssistantMessage`、`ResultMessage`、`StreamEvent` 等对象 | +| 类型定义 | `src/claude_agent_sdk/types.py` | `ContentBlock`、控制请求/响应、结果统计等 TypedDict/Dataclass | +| 对外客户端 | `src/claude_agent_sdk/client.py`、`query.py` | 暴露 `ClaudeSDKClient`、`query()` 等高层 API,封装命令行与选项 | + +- 建议在实现第三方 SDK 时直接参考以上模块,确保字段命名、结构与错误处理完全一致。 +- 若需自定义传输层(如 WebSocket),可沿用同样的 Envelope 与控制语义,实现最小改动的兼容适配。 + +## 详细 Schema 附录 + +以下附录按照 Claude SDK 文档顺序列出关键 Envelope 与控制协议 Schema,便于实现时逐项比对。 + +### 会话消息 Envelope 结构 + +| 名称 | 核心字段 | 说明 | +| --- | --- | --- | +| `UserMessageEnvelope` | `type="user"`、`message.role="user"`、`message.content`、`parent_tool_use_id` | SDK 写入 CLI 的用户消息封装,也用于 CLI 回放用户输入。 | +| `AssistantMessageEnvelope` | `type="assistant"`、`message.model`、`message.content[]`、`parent_tool_use_id` | Claude CLI 输出的助手回复,内容块遵循 `ContentBlock` 枚举。 | +| `SystemMessageEnvelope` | `type="system"`、`subtype`、`data` | 系统提示、设置或统计信息。 | +| `ResultMessageEnvelope` | `type="result"`、`subtype`、`duration_ms`、`is_error`、`usage`、`total_cost_usd` | 任务级统计、工具执行总结或错误描述。 | +| `StreamEventEnvelope` | `type="stream_event"`、`uuid`、`session_id`、`event.type`、`event.delta` | 承载 Anthropic RawMessageStreamEvent,用于增量输出与内容块生命周期管理。 | + +`ContentBlock` 类型包括: + +- `text`: `{"type":"text","text":"..."}` —— 普通文本回复。 +- `thinking`: `{"type":"thinking","thinking":"...","signature":"sig-1"}` —— 展示 Claude 内部推理。 +- `tool_use`: `{"type":"tool_use","id":"toolu_1","name":"run_shell","input":{...}}` —— 声明工具调用并提供参数。 +- `tool_result`: `{"type":"tool_result","tool_use_id":"toolu_1","content":[...],"is_error":false}` —— 返回工具执行结果,可包含富文本或结构化内容。 + +### 控制请求 Schema 摘要 + +| 子类型 | 方向 | 必需字段 | 说明 | +| --- | --- | --- | --- | +| `initialize` | SDK → CLI | `hooks`、`mcp`、`permission_mode` | 握手阶段传递 Hook/MCP 配置与权限模式。 | +| `interrupt` | SDK → CLI | - | 中断当前任务。 | +| `set_permission_mode` | SDK → CLI | `mode` | 运行时切换权限模式。 | +| `set_model` | SDK → CLI | `model` | 切换模型,允许 `null` 恢复默认。 | +| `can_use_tool` | CLI → SDK | `tool_name`、`input`、`permission_suggestions?` | 工具权限与参数确认。 | +| `hook_callback` | CLI → SDK | `callback_id`、`input?`、`tool_use_id?` | 执行已注册 Hook,支持同步/异步结果。 | +| `mcp_message` | CLI → SDK | `server_name`、`message` | 透传 MCP JSON-RPC 请求。 | +| `control_cancel_request` | CLI → SDK | `request_id?`、`reason?` | 取消挂起的控制请求。 | + +成功响应 (`control_response.success`) 对应的 payload: + +- `InitializeSuccessPayload`:`commands`、`output_style`、`serverInfo`、`capabilities`。 +- `ToolPermissionSuccessPayload`:`behavior`、`updatedInput?`、`updatedPermissions?`、`message?`、`interrupt?`。 +- `HookCallbackSuccessPayload`:`async?`、`continue?`、`decision?`、`hookSpecificOutput?`。 +- `McpMessageSuccessPayload`:`mcp_response`(JSON-RPC 返回)。 +- `AckOnlyPayload`:用于 `interrupt`、`set_model` 等无额外数据的确认。 + +错误响应 (`control_response.error`) 在 `error` 字段提供文本描述,可结合 `result` 或 `stream_event` 提示用户。 + +### 代码片段参考 + +```python + case "assistant": + try: + content_blocks: list[ContentBlock] = [] + for block in data["message"]["content"]: + match block["type"]: + case "text": + content_blocks.append(TextBlock(text=block["text"])) + case "thinking": + content_blocks.append( + ThinkingBlock( + thinking=block["thinking"], + signature=block["signature"], + ) + ) + case "tool_use": + content_blocks.append( + ToolUseBlock( + id=block["id"], + name=block["name"], + input=block["input"], + ) + ) + case "tool_result": + content_blocks.append( + ToolResultBlock( + tool_use_id=block["tool_use_id"], + content=block.get("content"), + is_error=block.get("is_error"), + ) + ) + + return AssistantMessage( + content=content_blocks, + model=data["message"]["model"], + parent_tool_use_id=data.get("parent_tool_use_id"), + ) + except KeyError as e: + raise MessageParseError( + f"Missing required field in assistant message: {e}", data + ) from e +``` + +```python + async def _handle_control_request(self, data: dict[str, Any]) -> None: + request_id = data["request_id"] + request = data["request"] + subtype = request["subtype"] + + if subtype == "can_use_tool": + if not self.can_use_tool: + await self._send_control_response_error( + request_id, "Tool usage not allowed: handler missing" + ) + return + response = await self.can_use_tool( + request["tool_name"], + request.get("tool_use_id"), + request.get("input"), + request.get("permission_suggestions"), + ) + await self._send_control_response_success(request_id, response) + elif subtype == "hook_callback": + response = await self._run_hook_callback(request, request_id) + await self._send_control_response_success(request_id, response) + elif subtype == "mcp_message": + response = await self._handle_mcp_message(request) + await self._send_control_response_success(request_id, response) + else: + await self._send_control_response_error( + request_id, f"Unsupported control subtype: {subtype}" + ) +``` + +```python + async def _read_stdout(self) -> None: + assert self.process is not None + assert self.stdout_queue is not None + + while True: + line = await self.process.stdout.readline() + if not line: + await self.stdout_queue.put(None) + break + + decoded = line.decode("utf-8", errors="replace").strip() + try: + message = json.loads(decoded) + except json.JSONDecodeError as exc: + raise CLIJSONDecodeError(decoded) from exc + + await self.stdout_queue.put(message) +``` diff --git a/package.json b/package.json index 4b034fe68..d360b1fec 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,8 @@ "build:packages": "npm run build --workspaces", "build:sandbox": "node scripts/build_sandbox.js --skip-npm-install-build", "bundle": "npm run generate && node esbuild.config.js && node scripts/copy_bundle_assets.js", + "qwen": "tsx packages/cli/index.ts", + "stream-json-session": "tsx packages/cli/index.ts --input-format stream-json --output-format stream-json", "test": "npm run test --workspaces --if-present", "test:ci": "npm run test:ci --workspaces --if-present && npm run test:scripts", "test:scripts": "vitest run --config ./scripts/tests/vitest.config.ts", diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 5abf181e9..af3f7eecf 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -236,6 +236,53 @@ describe('parseArguments', () => { mockConsoleError.mockRestore(); }); + it('should throw an error when include-partial-messages is used without stream-json output', async () => { + process.argv = [ + 'node', + 'script.js', + '--include-partial-messages', + ]; + + const mockExit = vi.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('process.exit called'); + }); + + const mockConsoleError = vi + .spyOn(console, 'error') + .mockImplementation(() => {}); + + await expect(parseArguments({} as Settings)).rejects.toThrow( + 'process.exit called', + ); + + expect(mockConsoleError).toHaveBeenCalledWith( + expect.stringContaining( + '--include-partial-messages requires --output-format stream-json', + ), + ); + + mockExit.mockRestore(); + mockConsoleError.mockRestore(); + }); + + it('should parse stream-json formats and include-partial-messages flag', async () => { + process.argv = [ + 'node', + 'script.js', + '--output-format', + 'stream-json', + '--input-format', + 'stream-json', + '--include-partial-messages', + ]; + + const argv = await parseArguments({} as Settings); + + expect(argv.outputFormat).toBe('stream-json'); + expect(argv.inputFormat).toBe('stream-json'); + expect(argv.includePartialMessages).toBe(true); + }); + it('should throw an error when using short flags -y and --approval-mode together', async () => { process.argv = ['node', 'script.js', '-y', '--approval-mode', 'yolo']; @@ -322,6 +369,25 @@ describe('loadCliConfig', () => { expect(config.getShowMemoryUsage()).toBe(true); }); + it('should propagate stream-json formats to config', async () => { + process.argv = [ + 'node', + 'script.js', + '--output-format', + 'stream-json', + '--input-format', + 'stream-json', + '--include-partial-messages', + ]; + const argv = await parseArguments({} as Settings); + const settings: Settings = {}; + const config = await loadCliConfig(settings, [], 'test-session', argv); + + expect(config.getOutputFormat()).toBe('stream-json'); + expect(config.getInputFormat()).toBe('stream-json'); + expect(config.getIncludePartialMessages()).toBe(true); + }); + it('should set showMemoryUsage to false when --memory flag is not present', async () => { process.argv = ['node', 'script.js']; const argv = await parseArguments({} as Settings); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index da06e35c8..12654bedd 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -116,6 +116,9 @@ export interface CliArgs { tavilyApiKey: string | undefined; screenReader: boolean | undefined; vlmSwitchMode: string | undefined; + inputFormat: string | undefined; + outputFormat: string | undefined; + includePartialMessages: boolean | undefined; } export async function parseArguments(settings: Settings): Promise { @@ -290,6 +293,24 @@ export async function parseArguments(settings: Settings): Promise { 'Default behavior when images are detected in input. Values: once (one-time switch), session (switch for entire session), persist (continue with current model). Overrides settings files.', default: process.env['VLM_SWITCH_MODE'], }) + .option('input-format', { + type: 'string', + choices: ['text', 'stream-json'], + description: 'Set the stdin consumption format', + default: 'text', + }) + .option('output-format', { + type: 'string', + choices: ['text', 'stream-json'], + description: 'Set the stdout emission format', + default: 'text', + }) + .option('include-partial-messages', { + type: 'boolean', + description: + 'Include partial assistant deltas when output-format is stream-json', + default: false, + }) .check((argv) => { if (argv.prompt && argv['promptInteractive']) { throw new Error( @@ -301,6 +322,14 @@ export async function parseArguments(settings: Settings): Promise { 'Cannot use both --yolo (-y) and --approval-mode together. Use --approval-mode=yolo instead.', ); } + if ( + argv['includePartialMessages'] && + argv['outputFormat'] !== 'stream-json' + ) { + throw new Error( + '--include-partial-messages requires --output-format stream-json', + ); + } return true; }), ) @@ -493,7 +522,10 @@ export async function loadCliConfig( } const interactive = - !!argv.promptInteractive || (process.stdin.isTTY && question.length === 0); + argv.inputFormat === 'stream-json' + ? false + : !!argv.promptInteractive || + (process.stdin.isTTY && question.length === 0); // In non-interactive mode, exclude tools that require a prompt. const extraExcludes: string[] = []; if (!interactive && !argv.experimentalAcp) { @@ -560,6 +592,11 @@ export async function loadCliConfig( const vlmSwitchMode = argv.vlmSwitchMode || settings.experimental?.vlmSwitchMode; + const inputFormat = + (argv.inputFormat as 'text' | 'stream-json' | undefined) ?? 'text'; + const outputFormat = + (argv.outputFormat as 'text' | 'stream-json' | undefined) ?? 'text'; + const includePartialMessages = Boolean(argv.includePartialMessages); return new Config({ sessionId, embeddingModel: DEFAULT_GEMINI_EMBEDDING_MODEL, @@ -569,6 +606,9 @@ export async function loadCliConfig( loadMemoryFromIncludeDirectories: settings.context?.loadMemoryFromIncludeDirectories || false, debugMode, + inputFormat, + outputFormat, + includePartialMessages, question, fullContext: argv.allFiles || false, coreTools: settings.tools?.core || undefined, diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 361bd5b7b..44278b28d 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -8,6 +8,7 @@ import type { Config } from '@qwen-code/qwen-code-core'; import { AuthType, FatalConfigError, + FatalInputError, getOauthClient, IdeConnectionEvent, IdeConnectionType, @@ -28,6 +29,7 @@ import { loadExtensions } from './config/extension.js'; import type { DnsResolutionOrder, LoadedSettings } from './config/settings.js'; import { loadSettings, SettingScope } from './config/settings.js'; import { runNonInteractive } from './nonInteractiveCli.js'; +import { runStreamJsonSession } from './streamJson/session.js'; import { AppWrapper } from './ui/App.js'; import { setMaxSizedBoxDebugging } from './ui/components/shared/MaxSizedBox.js'; import { SettingsContext } from './ui/contexts/SettingsContext.js'; @@ -336,7 +338,7 @@ export async function main() { return runZedIntegration(config, settings, extensions, argv); } - let input = config.getQuestion(); + let input = config.getQuestion() ?? ''; const startupWarnings = [ ...(await getStartupWarnings()), ...(await getUserStartupWarnings(workspaceRoot)), @@ -347,15 +349,26 @@ export async function main() { await startInteractiveUI(config, settings, startupWarnings, workspaceRoot); return; } - // If not a TTY, read from stdin - // This is for cases where the user pipes input directly into the command - if (!process.stdin.isTTY) { + const inputFormat = config.getInputFormat(); + if (inputFormat === 'stream-json') { + const trimmedInput = input.trim(); + const nonInteractiveConfig = await validateNonInteractiveAuth( + settings.merged.security?.auth?.selectedType, + settings.merged.security?.auth?.useExternal, + config, + ); + await runStreamJsonSession( + nonInteractiveConfig, + trimmedInput ? trimmedInput : undefined, + ); + return; + } else if (!process.stdin.isTTY) { const stdinData = await readStdin(); if (stdinData) { - input = `${stdinData}\n\n${input}`; + input = input ? `${stdinData}\n\n${input}` : stdinData; } } - if (!input) { + if (!input.trim()) { console.error( `No input provided via stdin. Input can be provided by piping data into gemini or using the --prompt option.`, ); diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 7d5727c27..3de37a016 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -7,15 +7,32 @@ import { type Config, type ToolRegistry, + type ToolCallRequestInfo, executeToolCall, ToolErrorType, + ToolConfirmationOutcome, shutdownTelemetry, GeminiEventType, type ServerGeminiStreamEvent, + convertToFunctionResponse, } from '@qwen-code/qwen-code-core'; import { type Part } from '@google/genai'; import { runNonInteractive } from './nonInteractiveCli.js'; import { vi } from 'vitest'; +import type { StreamJsonUserEnvelope } from './streamJson/types.js'; +import type { StreamJsonWriter } from './streamJson/writer.js'; +import type { StreamJsonController } from './streamJson/controller.js'; + +type AwaitingApprovalToolCall = { + status: 'awaiting_approval'; + request: ToolCallRequestInfo; + confirmationDetails: { + onConfirm: ( + outcome: ToolConfirmationOutcome, + payload?: unknown, + ) => Promise; + }; +}; // Mock core modules vi.mock('./ui/hooks/atCommandProcessor.js'); @@ -27,6 +44,13 @@ vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => { executeToolCall: vi.fn(), shutdownTelemetry: vi.fn(), isTelemetrySdkInitialized: vi.fn().mockReturnValue(true), + convertToFunctionResponse: vi + .fn() + .mockImplementation( + (_toolName: string, callId: string, content: unknown) => [ + { text: `converted-${callId}-${JSON.stringify(content)}` }, + ], + ), }; }); @@ -39,11 +63,19 @@ describe('runNonInteractive', () => { let processStdoutSpy: vi.SpyInstance; let mockGeminiClient: { sendMessageStream: vi.Mock; + getChat: vi.Mock; }; + let mockGetDebugResponses: vi.Mock; beforeEach(async () => { mockCoreExecuteToolCall = vi.mocked(executeToolCall); mockShutdownTelemetry = vi.mocked(shutdownTelemetry); + vi.mocked(convertToFunctionResponse).mockClear(); + vi.mocked(convertToFunctionResponse).mockImplementation( + (_toolName: string, callId: string, content: unknown) => [ + { text: `converted-${callId}-${JSON.stringify(content)}` }, + ], + ); consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); processStdoutSpy = vi @@ -55,10 +87,17 @@ describe('runNonInteractive', () => { getFunctionDeclarations: vi.fn().mockReturnValue([]), } as unknown as ToolRegistry; + mockGetDebugResponses = vi.fn(() => []); + mockGeminiClient = { sendMessageStream: vi.fn(), + getChat: vi.fn(() => ({ + getDebugResponses: mockGetDebugResponses, + })), }; + let currentModel = 'test-model'; + mockConfig = { initialize: vi.fn().mockResolvedValue(undefined), getGeminiClient: vi.fn().mockReturnValue(mockGeminiClient), @@ -68,6 +107,13 @@ describe('runNonInteractive', () => { getFullContext: vi.fn().mockReturnValue(false), getContentGeneratorConfig: vi.fn().mockReturnValue({}), getDebugMode: vi.fn().mockReturnValue(false), + getOutputFormat: vi.fn().mockReturnValue('text'), + getIncludePartialMessages: vi.fn().mockReturnValue(false), + getSessionId: vi.fn().mockReturnValue('session-id'), + getModel: vi.fn(() => currentModel), + setModel: vi.fn(async (model: string) => { + currentModel = model; + }), } as unknown as Config; const { handleAtCommand } = await import( @@ -143,6 +189,7 @@ describe('runNonInteractive', () => { mockConfig, expect.objectContaining({ name: 'testTool' }), expect.any(AbortSignal), + undefined, ); expect(mockGeminiClient.sendMessageStream).toHaveBeenNthCalledWith( 2, @@ -154,6 +201,677 @@ describe('runNonInteractive', () => { expect(processStdoutSpy).toHaveBeenCalledWith('\n'); }); + it('should emit stream-json envelopes when output format is stream-json', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + const writes: string[] = []; + processStdoutSpy.mockImplementation((chunk: string | Uint8Array) => { + if (typeof chunk === 'string') { + writes.push(chunk); + } else { + writes.push(Buffer.from(chunk).toString('utf8')); + } + return true; + }); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Hello JSON' }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive(mockConfig, 'Stream mode input', 'prompt-id-json'); + + const envelopes = writes + .join('') + .split('\n') + .filter((line) => line.trim().length > 0) + .map((line) => JSON.parse(line)); + + expect(envelopes.at(0)?.type).toBe('user'); + expect(envelopes.at(1)?.type).toBe('assistant'); + expect(envelopes.at(1)?.message?.content?.[0]?.text).toBe('Hello JSON'); + expect(envelopes.at(-1)?.type).toBe('result'); + }); + + it('should emit stream events when include-partial-messages is enabled', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + (mockConfig.getIncludePartialMessages as vi.Mock).mockReturnValue(true); + const writes: string[] = []; + processStdoutSpy.mockImplementation((chunk: string | Uint8Array) => { + if (typeof chunk === 'string') { + writes.push(chunk); + } else { + writes.push(Buffer.from(chunk).toString('utf8')); + } + return true; + }); + + const events: ServerGeminiStreamEvent[] = [ + { + type: GeminiEventType.Thought, + value: { subject: 'Plan', description: 'Assess repo' }, + }, + { type: GeminiEventType.Content, value: 'A' }, + { type: GeminiEventType.Content, value: 'B' }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive(mockConfig, 'Partial stream', 'prompt-id-partial'); + + const envelopes = writes + .join('') + .split('\n') + .filter((line) => line.trim().length > 0) + .map((line) => JSON.parse(line)); + + const streamEvents = envelopes.filter( + (envelope) => envelope.type === 'stream_event', + ); + + expect(streamEvents.length).toBeGreaterThan(0); + expect( + streamEvents.some( + (event) => + event.event?.type === 'content_block_delta' && + event.event?.delta?.type === 'thinking_delta', + ), + ).toBe(true); + expect( + streamEvents.some( + (event) => + event.event?.type === 'content_block_delta' && + event.event?.delta?.type === 'text_delta', + ), + ).toBe(true); + expect(envelopes.at(-1)?.type).toBe('result'); + }); + + it('should emit tool result envelopes in stream-json mode', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + (mockConfig.getIncludePartialMessages as vi.Mock).mockReturnValue(true); + const writes: string[] = []; + processStdoutSpy.mockImplementation((chunk: string | Uint8Array) => { + if (typeof chunk === 'string') { + writes.push(chunk); + } else { + writes.push(Buffer.from(chunk).toString('utf8')); + } + return true; + }); + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'tool-1', + name: 'testTool', + args: { value: 1 }, + isClientInitiated: false, + prompt_id: 'prompt-id-stream-tool', + }, + }; + mockCoreExecuteToolCall.mockResolvedValue({ + responseParts: [{ text: 'Tool output' }], + resultDisplay: 'Tool output', + }); + + const firstEvents: ServerGeminiStreamEvent[] = [toolCallEvent]; + const secondEvents: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'Done' }, + ]; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents(firstEvents)) + .mockReturnValueOnce(createStreamFromEvents(secondEvents)); + + await runNonInteractive( + mockConfig, + 'Tool invocation', + 'prompt-id-stream-tool', + ); + + const envelopes = writes + .join('') + .split('\n') + .filter((line) => line.trim().length > 0) + .map((line) => JSON.parse(line)); + + const streamEvents = envelopes.filter( + (env) => env.type === 'stream_event', + ); + expect( + streamEvents.some( + (event) => + event.event?.type === 'content_block_delta' && + event.event?.delta?.type === 'input_json_delta', + ), + ).toBe(true); + + const userEnvelopes = envelopes.filter((env) => env.type === 'user'); + expect( + userEnvelopes.some( + (env) => + env.parent_tool_use_id === 'tool-1' && + env.message?.content?.[0]?.type === 'tool_result', + ), + ).toBe(true); + expect(envelopes.at(-2)?.type).toBe('assistant'); + expect(envelopes.at(-2)?.message?.content?.[0]?.text).toBe('Done'); + expect(envelopes.at(-1)?.type).toBe('result'); + }); + + it('honours updated tool input and emits approval system message', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + const onConfirm = vi.fn().mockResolvedValue(undefined); + + mockCoreExecuteToolCall.mockImplementation( + async (_config, requestInfo, _signal, options) => { + options?.onToolCallsUpdate?.([ + { + status: 'awaiting_approval', + request: requestInfo, + confirmationDetails: { + onConfirm, + }, + } as unknown as AwaitingApprovalToolCall, + ]); + return { responseParts: [] }; + }, + ); + + const sendControlRequest = vi.fn().mockResolvedValue({ + success: true, + response: { + behavior: 'allow', + updatedInput: { arg1: 'updated' }, + message: 'Approved by host', + }, + }); + + const interruptActiveRun = vi.fn(); + const systemMessages: Array<{ subtype: string; data?: unknown }> = []; + const streamJsonWriter = { + emitSystemMessage: vi + .fn<(subtype: string, data?: unknown) => void>() + .mockImplementation((subtype, data) => { + systemMessages.push({ subtype, data }); + }), + emitResult: vi.fn(), + writeEnvelope: vi.fn(), + emitUserMessageFromParts: vi.fn(), + emitToolResult: vi.fn(), + createAssistantBuilder: vi.fn(() => ({ + appendText: vi.fn(), + appendThinking: vi.fn(), + appendToolUse: vi.fn(), + finalize: vi.fn(() => ({ + type: 'assistant', + message: { role: 'assistant', content: [] }, + })), + })), + } as unknown as StreamJsonWriter; + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'call-1', + name: 'testTool', + args: { arg1: 'original' }, + isClientInitiated: false, + prompt_id: 'prompt-can_use', + }, + }; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents([toolCallEvent]), + ); + + await runNonInteractive(mockConfig, 'Use tool', 'prompt-can_use', { + streamJson: { + writer: streamJsonWriter, + controller: { + sendControlRequest, + interruptActiveRun, + } as unknown as StreamJsonController, + }, + }); + + expect(sendControlRequest).toHaveBeenCalledWith( + 'can_use_tool', + expect.objectContaining({ tool_name: 'testTool' }), + expect.any(Object), + ); + expect(onConfirm).toHaveBeenCalledWith( + ToolConfirmationOutcome.ProceedOnce, + ); + expect( + mockCoreExecuteToolCall.mock.calls[0]?.[1]?.args, + ).toEqual({ arg1: 'updated' }); + expect( + systemMessages.some( + (entry) => + entry.subtype === 'tool_permission' && + (entry.data as { message?: string })?.['message'] === + 'Approved by host', + ), + ).toBe(true); + expect(interruptActiveRun).not.toHaveBeenCalled(); + }); + + it('cancels tool execution when control response denies permission', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + const onConfirm = vi.fn().mockResolvedValue(undefined); + + mockCoreExecuteToolCall.mockImplementation( + async (_config, requestInfo, _signal, options) => { + options?.onToolCallsUpdate?.([ + { + status: 'awaiting_approval', + request: requestInfo, + confirmationDetails: { + onConfirm, + }, + } as unknown as AwaitingApprovalToolCall, + ]); + return { responseParts: [] }; + }, + ); + + const sendControlRequest = vi.fn().mockResolvedValue({ + success: false, + error: 'Denied by host', + }); + + const streamJsonWriter = { + emitSystemMessage: vi.fn(), + emitResult: vi.fn(), + writeEnvelope: vi.fn(), + emitUserMessageFromParts: vi.fn(), + emitToolResult: vi.fn(), + createAssistantBuilder: vi.fn(() => ({ + appendText: vi.fn(), + appendThinking: vi.fn(), + appendToolUse: vi.fn(), + finalize: vi.fn(() => ({ + type: 'assistant', + message: { role: 'assistant', content: [] }, + })), + })), + } as unknown as StreamJsonWriter; + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents([ + { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'call-2', + name: 'testTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-deny', + }, + }, + ]), + ); + + await runNonInteractive(mockConfig, 'Use tool', 'prompt-deny', { + streamJson: { + writer: streamJsonWriter, + controller: { + sendControlRequest, + interruptActiveRun: vi.fn(), + } as unknown as StreamJsonController, + }, + }); + + expect(onConfirm).toHaveBeenCalledWith( + ToolConfirmationOutcome.Cancel, + ); + expect(streamJsonWriter.emitSystemMessage).toHaveBeenCalledWith( + 'tool_permission', + expect.objectContaining({ behavior: 'error' }), + ); + }); + + it('invokes hook callbacks during tool execution and can suppress output', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + mockCoreExecuteToolCall.mockResolvedValue({ + responseParts: [{ text: 'tool-result' }], + }); + + const sendControlRequest = vi + .fn() + .mockImplementation((subtype: string, payload: Record) => { + if (subtype === 'hook_callback' && payload['callback_id'] === 'pre') { + return Promise.resolve({ + success: true, + response: { decision: 'continue' }, + }); + } + if (subtype === 'hook_callback' && payload['callback_id'] === 'post') { + return Promise.resolve({ + success: true, + response: { + suppressOutput: true, + systemMessage: 'suppressed by hook', + }, + }); + } + return Promise.resolve({ success: true, response: { behavior: 'allow' } }); + }); + + const interruptActiveRun = vi.fn(); + const streamJsonWriter = { + emitSystemMessage: vi.fn(), + emitResult: vi.fn(), + writeEnvelope: vi.fn(), + emitUserMessageFromParts: vi.fn(), + createAssistantBuilder: vi.fn(() => ({ + appendText: vi.fn(), + appendThinking: vi.fn(), + appendToolUse: vi.fn(), + finalize: vi.fn(() => ({ + type: 'assistant', + message: { role: 'assistant', content: [] }, + })), + })), + emitToolResult: vi.fn(), + } as unknown as StreamJsonWriter; + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'hook-call-1', + name: 'hookTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-hook', + }, + }; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents([toolCallEvent])) + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'final answer' }, + ]), + ); + + const controlContext = { + hookCallbacks: new Map([ + ['pre', { event: 'pre_tool' }], + ['post', { event: 'post_tool' }], + ]), + registeredHookEvents: new Set(['pre_tool', 'post_tool']), + mcpClients: new Map(), + }; + + await runNonInteractive(mockConfig, 'Use hook tool', 'prompt-hook', { + streamJson: { + writer: streamJsonWriter, + controller: { + sendControlRequest, + interruptActiveRun, + } as unknown as StreamJsonController, + controlContext, + }, + }); + + expect(sendControlRequest).toHaveBeenCalledWith( + 'hook_callback', + expect.objectContaining({ callback_id: 'pre' }), + expect.objectContaining({ timeoutMs: expect.any(Number) }), + ); + expect(sendControlRequest).toHaveBeenCalledWith( + 'hook_callback', + expect.objectContaining({ callback_id: 'post' }), + expect.objectContaining({ timeoutMs: expect.any(Number) }), + ); + expect(streamJsonWriter.emitToolResult).not.toHaveBeenCalled(); + expect( + streamJsonWriter.emitSystemMessage.mock.calls.some( + ([subtype, data]) => + subtype === 'hook_callback' && + (data as { message?: string })?.message === 'suppressed by hook', + ), + ).toBe(true); + expect(interruptActiveRun).not.toHaveBeenCalled(); + }); + + it('skips tool execution when hook callback requests cancellation', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + const sendControlRequest = vi + .fn() + .mockImplementation((subtype: string, payload: Record) => { + if (subtype === 'hook_callback' && payload['callback_id'] === 'pre') { + return Promise.resolve({ + success: true, + response: { + continue: false, + systemMessage: 'hook denied tool execution', + }, + }); + } + if (subtype === 'hook_callback' && payload['callback_id'] === 'post') { + return Promise.resolve({ success: true, response: {} }); + } + return Promise.resolve({ success: true, response: { behavior: 'allow' } }); + }); + + const streamJsonWriter = { + emitSystemMessage: vi.fn(), + emitResult: vi.fn(), + writeEnvelope: vi.fn(), + emitUserMessageFromParts: vi.fn(), + createAssistantBuilder: vi.fn(() => ({ + appendText: vi.fn(), + appendThinking: vi.fn(), + appendToolUse: vi.fn(), + finalize: vi.fn(() => ({ + type: 'assistant', + message: { role: 'assistant', content: [] }, + })), + })), + emitToolResult: vi.fn(), + } as unknown as StreamJsonWriter; + + const toolCallEvent: ServerGeminiStreamEvent = { + type: GeminiEventType.ToolCallRequest, + value: { + callId: 'hook-skip-1', + name: 'skipTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-skip', + }, + }; + + mockGeminiClient.sendMessageStream + .mockReturnValueOnce(createStreamFromEvents([toolCallEvent])) + .mockReturnValueOnce( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'final answer' }, + ]), + ); + + const controlContext = { + hookCallbacks: new Map([ + ['pre', { event: 'pre_tool' }], + ['post', { event: 'post_tool' }], + ]), + registeredHookEvents: new Set(['pre_tool', 'post_tool']), + mcpClients: new Map(), + }; + + await runNonInteractive(mockConfig, 'Skip via hook', 'prompt-skip', { + streamJson: { + writer: streamJsonWriter, + controller: { + sendControlRequest, + interruptActiveRun: vi.fn(), + } as unknown as StreamJsonController, + controlContext, + }, + }); + + expect(mockCoreExecuteToolCall).not.toHaveBeenCalled(); + expect(streamJsonWriter.emitToolResult).not.toHaveBeenCalled(); + expect( + streamJsonWriter.emitSystemMessage.mock.calls.some( + ([subtype, data]) => + subtype === 'hook_callback' && + (data as { message?: string })?.message === 'hook denied tool execution', + ), + ).toBe(true); + }); + + it('should include usage metadata and api duration in stream-json results', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + const writes: string[] = []; + processStdoutSpy.mockImplementation((chunk: string | Uint8Array) => { + if (typeof chunk === 'string') { + writes.push(chunk); + } else { + writes.push(Buffer.from(chunk).toString('utf8')); + } + return true; + }); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'All set' }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + const usageMetadata = { + promptTokenCount: 12, + candidatesTokenCount: 4, + totalTokenCount: 16, + cachedContentTokenCount: 2, + }; + mockGetDebugResponses.mockReturnValue([{ usageMetadata }]); + + const nowSpy = vi.spyOn(Date, 'now'); + let tick = 0; + nowSpy.mockImplementation(() => { + tick += 500; + return tick; + }); + + try { + await runNonInteractive(mockConfig, 'Usage check', 'prompt-id-usage'); + } finally { + nowSpy.mockRestore(); + } + + const envelopes = writes + .join('') + .split('\n') + .filter((line) => line.trim().length > 0) + .map((line) => JSON.parse(line)); + + const resultEnvelope = envelopes.at(-1); + expect(resultEnvelope?.type).toBe('result'); + expect(resultEnvelope?.duration_api_ms).toBeGreaterThan(0); + expect(resultEnvelope?.usage).toEqual({ + input_tokens: 12, + output_tokens: 4, + total_tokens: 16, + cache_read_input_tokens: 2, + }); + expect(resultEnvelope?.is_error).toBe(false); + }); + + it('converts structured tool_result envelopes using tool registry mapping', async () => { + const toolCallRegistry = new Map(); + toolCallRegistry.set('tool-remote', { + callId: 'tool-remote', + name: 'remote_tool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-remote', + }); + + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Follow up' }, + ]), + ); + + await runNonInteractive( + mockConfig, + '', + 'prompt-remote', + { + streamJson: { + writer: undefined as never, + controller: undefined as never, + toolCallRegistry, + }, + userEnvelope: { + type: 'user', + message: { + content: [ + { + type: 'tool_result', + tool_use_id: 'tool-remote', + content: 'processed output', + }, + ], + }, + } as unknown as StreamJsonUserEnvelope, + }, + ); + + expect(vi.mocked(convertToFunctionResponse)).toHaveBeenCalledTimes(1); + expect(vi.mocked(convertToFunctionResponse)).toHaveBeenCalledWith( + 'remote_tool', + 'tool-remote', + 'processed output', + ); + expect(toolCallRegistry.size).toBe(0); + expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledWith( + [{ text: 'converted-tool-remote-"processed output"' }], + expect.any(AbortSignal), + 'prompt-remote', + ); + }); + + it('applies temporary model overrides from stream-json envelope options', async () => { + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents([ + { type: GeminiEventType.Content, value: 'Response' }, + ]), + ); + + await runNonInteractive( + mockConfig, + 'ignored', + 'prompt-temp-model', + { + userEnvelope: { + type: 'user', + message: { content: 'Hello from envelope' }, + options: { temporary_model: 'temp-model' }, + } as unknown as StreamJsonUserEnvelope, + }, + ); + + expect(mockConfig.setModel).toHaveBeenNthCalledWith( + 1, + 'temp-model', + expect.objectContaining({ context: 'temporary_model' }), + ); + expect(mockConfig.setModel).toHaveBeenNthCalledWith( + 2, + 'test-model', + expect.objectContaining({ context: 'temporary_model_restore' }), + ); + }); + it('should handle error during tool execution and should send error back to the model', async () => { const toolCallEvent: ServerGeminiStreamEvent = { type: GeminiEventType.ToolCallRequest, @@ -215,6 +933,61 @@ describe('runNonInteractive', () => { expect(processStdoutSpy).toHaveBeenCalledWith('Sorry, let me try again.'); }); + it('should include usage metadata and API duration in stream-json result', async () => { + (mockConfig.getOutputFormat as vi.Mock).mockReturnValue('stream-json'); + const writes: string[] = []; + processStdoutSpy.mockImplementation((chunk: string | Uint8Array) => { + if (typeof chunk === 'string') { + writes.push(chunk); + } else { + writes.push(Buffer.from(chunk).toString('utf8')); + } + return true; + }); + + const nowSpy = vi.spyOn(Date, 'now'); + let current = 0; + nowSpy.mockImplementation(() => { + current += 500; + return current; + }); + + const usageMetadata = { + promptTokenCount: 11, + candidatesTokenCount: 5, + totalTokenCount: 16, + cachedContentTokenCount: 3, + }; + mockGetDebugResponses.mockReturnValue([{ usageMetadata }]); + + const events: ServerGeminiStreamEvent[] = [ + { type: GeminiEventType.Content, value: 'All done' }, + ]; + mockGeminiClient.sendMessageStream.mockReturnValue( + createStreamFromEvents(events), + ); + + await runNonInteractive(mockConfig, 'usage test', 'prompt-usage'); + + const envelopes = writes + .join('') + .split('\n') + .filter((line) => line.trim().length > 0) + .map((line) => JSON.parse(line)); + + const resultEnvelope = envelopes.at(-1); + expect(resultEnvelope?.type).toBe('result'); + expect(resultEnvelope?.duration_api_ms).toBeGreaterThan(0); + expect(resultEnvelope?.usage).toEqual({ + input_tokens: 11, + output_tokens: 5, + total_tokens: 16, + cache_read_input_tokens: 3, + }); + + nowSpy.mockRestore(); + }); + it('should exit with error if sendMessageStream throws initially', async () => { const apiError = new Error('API connection failed'); mockGeminiClient.sendMessageStream.mockImplementation(() => { diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index bdc42f461..71f34bdc7 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -13,22 +13,712 @@ import { parseAndFormatApiError, FatalInputError, FatalTurnLimitedError, + ToolConfirmationOutcome, + convertToFunctionResponse, } from '@qwen-code/qwen-code-core'; -import type { Content, Part } from '@google/genai'; +import type { Content, Part, PartListUnion } from '@google/genai'; import { ConsolePatcher } from './ui/utils/ConsolePatcher.js'; import { handleAtCommand } from './ui/hooks/atCommandProcessor.js'; +import type { ExecuteToolCallOptions } from '@qwen-code/qwen-code-core'; +import type { StreamJsonController } from './streamJson/controller.js'; +import { StreamJsonWriter } from './streamJson/writer.js'; +import type { + StreamJsonContentBlock, + StreamJsonUsage, + StreamJsonUserEnvelope, +} from './streamJson/types.js'; +import type { StreamJsonControlContext } from './streamJson/session.js'; + +export interface RunNonInteractiveOptions { + abortController?: AbortController; + streamJson?: { + writer: StreamJsonWriter; + controller: StreamJsonController; + toolCallRegistry?: Map; + controlContext?: StreamJsonControlContext; + }; + userEnvelope?: StreamJsonUserEnvelope; +} + +interface AwaitingApprovalToolCall { + request: ToolCallRequestInfo; + confirmationDetails: { + type: string; + onConfirm: ( + outcome: ToolConfirmationOutcome, + payload?: unknown, + ) => Promise; + [key: string]: unknown; + }; +} + +function buildPermissionSuggestions( + confirmationDetails: unknown, +): unknown { + if ( + !confirmationDetails || + typeof confirmationDetails !== 'object' || + !('type' in confirmationDetails) + ) { + return null; + } + + const details = confirmationDetails as Record; + const type = String(details['type'] ?? ''); + const title = typeof details['title'] === 'string' ? details['title'] : undefined; + + switch (type) { + case 'exec': + return [ + { + type: 'command', + title, + command: details['command'], + root_command: details['rootCommand'], + }, + ]; + case 'edit': + return [ + { + type: 'diff', + title, + file_name: details['fileName'], + file_path: details['filePath'], + diff: details['fileDiff'], + original_content: details['originalContent'], + new_content: details['newContent'], + }, + ]; + case 'plan': + return [ + { + type: 'plan', + title, + plan: details['plan'], + }, + ]; + case 'mcp': + return [ + { + type: 'mcp', + title, + server_name: details['serverName'], + tool_name: details['toolName'], + tool_display_name: details['toolDisplayName'], + }, + ]; + case 'info': + return [ + { + type: 'info', + title, + prompt: details['prompt'], + urls: details['urls'], + }, + ]; + default: + return [ + { + type, + title, + }, + ]; + } +} + +function normalizePartList(parts: PartListUnion | null): Part[] { + if (!parts) { + return []; + } + + if (typeof parts === 'string') { + return [{ text: parts }]; + } + + if (Array.isArray(parts)) { + return parts.map((part) => + typeof part === 'string' ? { text: part } : (part as Part), + ); + } + + return [parts as Part]; +} + +function extractStructuredPartsFromEnvelope( + envelope: StreamJsonUserEnvelope, + registry?: Map, +): Part[] { + const content = envelope.message?.content; + if (!Array.isArray(content)) { + return []; + } + + const parts: Part[] = []; + for (const block of content) { + if (!block || typeof block !== 'object' || !('type' in block)) { + continue; + } + + if (block.type === 'tool_result') { + const toolUseId = + typeof block.tool_use_id === 'string' && block.tool_use_id.trim() + ? block.tool_use_id + : envelope.parent_tool_use_id; + + if (toolUseId && registry?.has(toolUseId)) { + const requestInfo = registry.get(toolUseId)!; + const responseParts = convertToFunctionResponse( + requestInfo.name, + requestInfo.callId, + toolResultContentToPartList(block.content), + ); + registry.delete(toolUseId); + if (Array.isArray(responseParts)) { + parts.push(...responseParts); + } else if (responseParts) { + parts.push(responseParts as Part); + } + continue; + } + + const fallbackText = buildToolResultFallbackText(block); + if (fallbackText) { + parts.push({ text: fallbackText }); + } + continue; + } + + if (block.type === 'text') { + if (block.text) { + parts.push({ text: block.text }); + } + continue; + } + + if (block.type === 'thinking') { + if (block.thinking) { + parts.push({ text: block.thinking }); + } + continue; + } + + parts.push({ text: JSON.stringify(block) }); + } + + return parts; +} + +function toolResultContentToPartList( + content?: StreamJsonContentBlock[] | string, +): PartListUnion { + if (!content) { + return [{ text: '' }]; + } + + if (typeof content === 'string') { + return content; + } + + const parts: Part[] = []; + for (const item of content) { + if (!item || typeof item !== 'object' || !('type' in item)) { + continue; + } + if (item.type === 'text') { + parts.push({ text: item.text ?? '' }); + } else { + parts.push({ text: JSON.stringify(item) }); + } + } + + return parts.length > 0 ? parts : [{ text: '' }]; +} + +function buildToolResultFallbackText( + block: Extract, +): string | undefined { + if (typeof block.content === 'string') { + return block.content; + } + + if (Array.isArray(block.content)) { + const fragments: string[] = []; + for (const item of block.content) { + if (!item || typeof item !== 'object' || !('type' in item)) { + continue; + } + if (item.type === 'text') { + if (item.text) { + fragments.push(item.text); + } + } else { + fragments.push(JSON.stringify(item)); + } + } + if (fragments.length > 0) { + return fragments.join(''); + } + } + + if (block.is_error) { + return 'Tool execution reported an error without content.'; + } + + return undefined; +} + +async function applyTemporaryEnvelopeOptions( + config: Config, + envelope?: StreamJsonUserEnvelope, +): Promise<() => Promise> { + const restoreStack: Array<() => Promise> = []; + const rawOptions = envelope?.options; + + if (rawOptions && typeof rawOptions === 'object') { + const options = rawOptions as Record; + const temporaryModelRaw = options['temporary_model']; + if (typeof temporaryModelRaw === 'string') { + const temporaryModel = temporaryModelRaw.trim(); + if (temporaryModel.length > 0) { + const originalModel = config.getModel(); + if (temporaryModel !== originalModel) { + await config.setModel(temporaryModel, { + reason: 'stream-json-options', + context: 'temporary_model', + }); + restoreStack.push(async () => { + try { + await config.setModel(originalModel, { + reason: 'stream-json-options', + context: 'temporary_model_restore', + }); + } catch (error) { + console.debug( + 'Failed to restore model after temporary stream-json option:', + error, + ); + } + }); + } + } + } + } + + return async () => { + for (const restore of restoreStack.reverse()) { + await restore(); + } + }; +} + +function extractUsageFromGeminiClient( + geminiClient: unknown, +): StreamJsonUsage | undefined { + if ( + !geminiClient || + typeof geminiClient !== 'object' || + typeof (geminiClient as { getChat?: unknown }).getChat !== 'function' + ) { + return undefined; + } + + try { + const chat = (geminiClient as { getChat: () => unknown }).getChat(); + if ( + !chat || + typeof chat !== 'object' || + typeof (chat as { getDebugResponses?: unknown }).getDebugResponses !== + 'function' + ) { + return undefined; + } + + const responses = (chat as { + getDebugResponses: () => Array>; + }).getDebugResponses(); + for (let i = responses.length - 1; i >= 0; i--) { + const metadata = responses[i]?.['usageMetadata'] as + | Record + | undefined; + if (metadata) { + const promptTokens = metadata['promptTokenCount']; + const completionTokens = metadata['candidatesTokenCount']; + const totalTokens = metadata['totalTokenCount']; + const cachedTokens = metadata['cachedContentTokenCount']; + + return { + input_tokens: + typeof promptTokens === 'number' ? promptTokens : undefined, + output_tokens: + typeof completionTokens === 'number' + ? completionTokens + : undefined, + total_tokens: + typeof totalTokens === 'number' ? totalTokens : undefined, + cache_read_input_tokens: + typeof cachedTokens === 'number' ? cachedTokens : undefined, + }; + } + } + } catch (error) { + console.debug('Failed to extract usage metadata:', error); + } + + return undefined; +} + +function calculateApproximateCost( + usage: StreamJsonUsage | undefined, +): number | undefined { + if (!usage) { + return undefined; + } + // Pricing details are not currently exposed; report zero until available. + return 0; +} + +const CONTROL_REQUEST_TIMEOUT_MS = 60_000; +const HOOK_CALLBACK_TIMEOUT_MS = CONTROL_REQUEST_TIMEOUT_MS; + +type HookCallbackMessage = { + event: string; + callbackId: string; + message: string; +}; + +type HookCallbackAggregateResult = { + shouldSkip: boolean; + shouldInterrupt: boolean; + suppressOutput: boolean; + messages: HookCallbackMessage[]; +}; export async function runNonInteractive( config: Config, input: string, prompt_id: string, + options: RunNonInteractiveOptions = {}, ): Promise { const consolePatcher = new ConsolePatcher({ stderr: true, debugMode: config.getDebugMode(), }); + const isStreamJsonOutput = config.getOutputFormat() === 'stream-json'; + const streamJsonWriter = isStreamJsonOutput + ? options.streamJson?.writer ?? + new StreamJsonWriter(config, config.getIncludePartialMessages()) + : undefined; + const streamJsonContext = options.streamJson; + const toolCallRegistry = streamJsonContext?.toolCallRegistry; + const controlContext = streamJsonContext?.controlContext; + const pendingPermissionRequests = new Set(); + let totalApiDurationMs = 0; + const startTime = Date.now(); + let turnCount = 0; + let cleanupTemporaryOptions: (() => Promise) | undefined; + + const emitHookMessages = (messages: HookCallbackMessage[]): void => { + if (!streamJsonWriter || messages.length === 0) { + return; + } + for (const { event, callbackId, message } of messages) { + streamJsonWriter.emitSystemMessage('hook_callback', { + event, + callback_id: callbackId, + message, + }); + } + }; + + const buildHookInput = ( + event: string, + toolCall: ToolCallRequestInfo, + responseSummary?: Record, + ): Record => { + const input: Record = { + event, + tool: { + name: toolCall.name, + input: toolCall.args, + call_id: toolCall.callId, + }, + }; + if (responseSummary) { + input['response'] = responseSummary; + } + return input; + }; + + const summarizeToolResponse = ( + toolResponse: Record, + ): Record => { + const summary: Record = { + is_error: Boolean(toolResponse['error']), + }; + if (toolResponse['resultDisplay'] !== undefined) { + summary['result_display'] = toolResponse['resultDisplay']; + } + if (Array.isArray(toolResponse['responseParts'])) { + summary['response_parts_count'] = (toolResponse['responseParts'] as unknown[]).length; + } + const errorInfo = toolResponse['error']; + if (errorInfo && typeof errorInfo === 'object') { + const message = (errorInfo as { message?: unknown }).message; + const type = (errorInfo as { type?: unknown }).type; + summary['error'] = { + message: typeof message === 'string' ? message : undefined, + type: typeof type === 'string' ? type : undefined, + }; + } + return summary; + }; + + const dispatchHookCallbacks = async ( + event: string, + toolCall: ToolCallRequestInfo, + responseSummary?: Record, + ): Promise => { + const initial: HookCallbackAggregateResult = { + shouldSkip: false, + shouldInterrupt: false, + suppressOutput: false, + messages: [], + }; + + if ( + !isStreamJsonOutput || + !streamJsonContext?.controller || + !controlContext || + controlContext.hookCallbacks.size === 0 || + !controlContext.registeredHookEvents.has(event) + ) { + return initial; + } + + const callbacks: string[] = []; + for (const [callbackId, registration] of controlContext.hookCallbacks) { + if (registration.event === event) { + callbacks.push(callbackId); + } + } + + if (callbacks.length === 0) { + return initial; + } + + const aggregate: HookCallbackAggregateResult = { + shouldSkip: false, + shouldInterrupt: false, + suppressOutput: false, + messages: [], + }; + + for (const callbackId of callbacks) { + try { + const responseEnvelope = await streamJsonContext.controller.sendControlRequest( + 'hook_callback', + { + callback_id: callbackId, + tool_use_id: toolCall.callId, + input: buildHookInput(event, toolCall, responseSummary), + }, + { timeoutMs: HOOK_CALLBACK_TIMEOUT_MS }, + ); + + if (!responseEnvelope.success) { + const errorDetail = responseEnvelope.error; + const errorMessage = + typeof errorDetail === 'string' + ? errorDetail + : errorDetail && typeof errorDetail === 'object' + ? String((errorDetail as { message?: unknown }).message ?? '') + : 'Hook callback failed'; + streamJsonWriter?.emitSystemMessage('hook_callback', { + event, + callback_id: callbackId, + error: errorMessage, + }); + continue; + } + + const responsePayload = + (responseEnvelope.response ?? {}) as Record; + + const decisionRaw = responsePayload['decision']; + const decision = + typeof decisionRaw === 'string' ? decisionRaw.toLowerCase() : undefined; + const continueFlag = responsePayload['continue']; + if ( + decision && + ['cancel', 'stop', 'interrupt', 'deny', 'abort'].includes(decision) + ) { + aggregate.shouldSkip = true; + if (decision === 'interrupt') { + aggregate.shouldInterrupt = true; + } + } + if (continueFlag === false) { + aggregate.shouldSkip = true; + } + if (responsePayload['interrupt'] === true) { + aggregate.shouldInterrupt = true; + } + if (responsePayload['suppressOutput'] === true) { + aggregate.suppressOutput = true; + } + + const systemMessage = responsePayload['systemMessage']; + if (typeof systemMessage === 'string' && systemMessage.trim().length > 0) { + aggregate.messages.push({ + event, + callbackId, + message: systemMessage, + }); + } else { + const reason = responsePayload['reason']; + if (typeof reason === 'string' && reason.trim().length > 0) { + aggregate.messages.push({ + event, + callbackId, + message: reason, + }); + } + } + } catch (error) { + streamJsonWriter?.emitSystemMessage('hook_callback', { + event, + callback_id: callbackId, + error: + error instanceof Error + ? error.message + : 'Hook callback request failed', + }); + } + } + + return aggregate; + }; + + const handleToolPermissionRequest = async ( + toolCall: AwaitingApprovalToolCall, + ): Promise => { + const controller = streamJsonContext?.controller; + if (!controller || !streamJsonWriter) { + await toolCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.ProceedOnce, + ); + pendingPermissionRequests.delete(toolCall.request.callId); + return; + } + + try { + const responseEnvelope = await controller.sendControlRequest( + 'can_use_tool', + { + tool_name: toolCall.request.name, + tool_use_id: toolCall.request.callId, + input: toolCall.request.args, + permission_suggestions: buildPermissionSuggestions( + toolCall.confirmationDetails, + ), + }, + { timeoutMs: CONTROL_REQUEST_TIMEOUT_MS }, + ); + + if (!responseEnvelope.success) { + const errorDetail = responseEnvelope.error; + const errorMessage = + typeof errorDetail === 'string' + ? errorDetail + : errorDetail && typeof errorDetail === 'object' && 'message' in errorDetail + ? String((errorDetail as { message?: unknown }).message ?? '') + : 'Tool permission request was rejected by control_response.'; + if (streamJsonWriter) { + streamJsonWriter.emitSystemMessage('tool_permission', { + tool: toolCall.request.name, + behavior: 'error', + message: errorMessage, + }); + } + await toolCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.Cancel, + ); + return; + } + + const responsePayload = + (responseEnvelope.response ?? {}) as Record; + const behavior = String(responsePayload['behavior'] ?? '').toLowerCase(); + const responseMessage = + typeof responsePayload['message'] === 'string' + ? responsePayload['message'] + : undefined; + + if (behavior === 'allow') { + const updatedInput = responsePayload['updatedInput']; + if (updatedInput && typeof updatedInput === 'object') { + toolCall.request.args = updatedInput as Record; + } + await toolCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.ProceedOnce, + ); + } else { + await toolCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.Cancel, + ); + } + + if (responseMessage && streamJsonWriter) { + streamJsonWriter.emitSystemMessage('tool_permission', { + tool: toolCall.request.name, + behavior, + message: responseMessage, + }); + } + + if (responsePayload['interrupt']) { + controller.interruptActiveRun(); + } + } catch (error) { + if (streamJsonWriter) { + streamJsonWriter.emitSystemMessage('tool_permission', { + tool: toolCall.request.name, + behavior: 'error', + message: + error instanceof Error ? error.message : 'Tool permission request failed', + }); + } + await toolCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.Cancel, + ); + } finally { + pendingPermissionRequests.delete(toolCall.request.callId); + } + }; + + const handleToolSchedulerUpdate = (toolCalls: unknown[]): void => { + for (const call of toolCalls) { + if ( + call && + typeof call === 'object' && + (call as { status?: string }).status === 'awaiting_approval' + ) { + const awaiting = call as AwaitingApprovalToolCall; + if ( + awaiting.confirmationDetails?.onConfirm && + !pendingPermissionRequests.has(awaiting.request.callId) + ) { + pendingPermissionRequests.add(awaiting.request.callId); + void handleToolPermissionRequest(awaiting); + } + } + } + }; + + const geminiClient = config.getGeminiClient(); + try { consolePatcher.patch(); // Handle EPIPE errors when the output is piped to a command that closes early. @@ -39,32 +729,56 @@ export async function runNonInteractive( } }); - const geminiClient = config.getGeminiClient(); - - const abortController = new AbortController(); - - const { processedQuery, shouldProceed } = await handleAtCommand({ - query: input, + const abortController = + options.abortController ?? new AbortController(); + cleanupTemporaryOptions = await applyTemporaryEnvelopeOptions( config, - addItem: (_item, _timestamp) => 0, - onDebugMessage: () => {}, - messageId: Date.now(), - signal: abortController.signal, - }); + options.userEnvelope, + ); + + const userEnvelope = options.userEnvelope; + let shouldEmitUserEcho = !userEnvelope; + let initialPartList: PartListUnion | null = null; - if (!shouldProceed || !processedQuery) { - // An error occurred during @include processing (e.g., file not found). - // The error message is already logged by handleAtCommand. - throw new FatalInputError( - 'Exiting due to an error processing the @ command.', + if (userEnvelope) { + const structuredParts = extractStructuredPartsFromEnvelope( + userEnvelope, + toolCallRegistry, ); + if (structuredParts.length > 0) { + initialPartList = structuredParts; + shouldEmitUserEcho = false; + } + } + + if (!initialPartList) { + const { processedQuery, shouldProceed } = await handleAtCommand({ + query: input, + config, + addItem: (_item, _timestamp) => 0, + onDebugMessage: () => {}, + messageId: Date.now(), + signal: abortController.signal, + }); + + if (!shouldProceed || !processedQuery) { + // An error occurred during @include processing (e.g., file not found). + // The error message is already logged by handleAtCommand. + throw new FatalInputError( + 'Exiting due to an error processing the @ command.', + ); + } + + initialPartList = processedQuery; } - let currentMessages: Content[] = [ - { role: 'user', parts: processedQuery as Part[] }, - ]; + const initialParts = normalizePartList(initialPartList); + let currentMessages: Content[] = [{ role: 'user', parts: initialParts }]; + + if (isStreamJsonOutput && shouldEmitUserEcho) { + streamJsonWriter?.emitUserMessageFromParts(initialParts); + } - let turnCount = 0; while (true) { turnCount++; if ( @@ -77,12 +791,15 @@ export async function runNonInteractive( } const toolCallRequests: ToolCallRequestInfo[] = []; + const apiStartTime = Date.now(); const responseStream = geminiClient.sendMessageStream( currentMessages[0]?.parts || [], abortController.signal, prompt_id, ); + const assistantBuilder = streamJsonWriter?.createAssistantBuilder(); + for await (const event of responseStream) { if (abortController.signal.aborted) { console.error('Operation cancelled.'); @@ -90,46 +807,151 @@ export async function runNonInteractive( } if (event.type === GeminiEventType.Content) { - process.stdout.write(event.value); + if (isStreamJsonOutput) { + assistantBuilder?.appendText(event.value); + } else { + process.stdout.write(event.value); + } + } else if (event.type === GeminiEventType.Thought) { + if (isStreamJsonOutput && event.value) { + const subject = event.value.subject?.trim(); + const description = event.value.description?.trim(); + const combined = [subject, description] + .filter((part) => part && part.length > 0) + .join(': '); + if (combined.length > 0) { + assistantBuilder?.appendThinking(combined); + } + } } else if (event.type === GeminiEventType.ToolCallRequest) { toolCallRequests.push(event.value); + if (toolCallRegistry) { + toolCallRegistry.set(event.value.callId, event.value); + } + if (isStreamJsonOutput) { + assistantBuilder?.appendToolUse(event.value); + } } } + assistantBuilder?.finalize(); + totalApiDurationMs += Date.now() - apiStartTime; + if (toolCallRequests.length > 0) { const toolResponseParts: Part[] = []; + pendingPermissionRequests.clear(); + const executeOptions: ExecuteToolCallOptions | undefined = + streamJsonContext + ? { + onToolCallsUpdate: handleToolSchedulerUpdate, + } + : undefined; for (const requestInfo of toolCallRequests) { + if (isStreamJsonOutput && streamJsonContext?.controller && controlContext) { + const hookResult = await dispatchHookCallbacks('pre_tool', requestInfo); + emitHookMessages(hookResult.messages); + if (hookResult.shouldInterrupt) { + streamJsonContext.controller.interruptActiveRun(); + abortController.abort(); + return; + } + if (hookResult.shouldSkip) { + toolCallRegistry?.delete(requestInfo.callId); + continue; + } + } + const toolResponse = await executeToolCall( config, requestInfo, abortController.signal, + executeOptions, ); if (toolResponse.error) { + const message = + toolResponse.resultDisplay || toolResponse.error.message; console.error( - `Error executing tool ${requestInfo.name}: ${toolResponse.resultDisplay || toolResponse.error.message}`, + `Error executing tool ${requestInfo.name}: ${message}`, + ); + if (isStreamJsonOutput) { + streamJsonWriter?.emitSystemMessage('tool_error', { + tool: requestInfo.name, + message, + }); + } + } + + let suppressOutput = false; + if (isStreamJsonOutput && streamJsonContext?.controller && controlContext) { + const responseSummary = summarizeToolResponse( + toolResponse as unknown as Record, + ); + const postHook = await dispatchHookCallbacks( + 'post_tool', + requestInfo, + responseSummary, ); + emitHookMessages(postHook.messages); + if (postHook.shouldInterrupt) { + streamJsonContext.controller.interruptActiveRun(); + abortController.abort(); + return; + } + suppressOutput = postHook.suppressOutput; + } + + if (isStreamJsonOutput && !suppressOutput) { + streamJsonWriter?.emitToolResult(requestInfo, toolResponse); } if (toolResponse.responseParts) { toolResponseParts.push(...toolResponse.responseParts); } + + toolCallRegistry?.delete(requestInfo.callId); } currentMessages = [{ role: 'user', parts: toolResponseParts }]; } else { - process.stdout.write('\n'); // Ensure a final newline + if (isStreamJsonOutput) { + const usage = extractUsageFromGeminiClient(geminiClient); + streamJsonWriter?.emitResult({ + isError: false, + durationMs: Date.now() - startTime, + apiDurationMs: totalApiDurationMs, + numTurns: turnCount, + usage, + totalCostUsd: calculateApproximateCost(usage), + }); + } else { + process.stdout.write('\n'); // Ensure a final newline + } return; } } } catch (error) { - console.error( - parseAndFormatApiError( - error, - config.getContentGeneratorConfig()?.authType, - ), + const formattedError = parseAndFormatApiError( + error, + config.getContentGeneratorConfig()?.authType, ); + console.error(formattedError); + if (isStreamJsonOutput) { + const usage = extractUsageFromGeminiClient(geminiClient); + streamJsonWriter?.emitResult({ + isError: true, + durationMs: Date.now() - startTime, + apiDurationMs: totalApiDurationMs, + numTurns: turnCount, + errorMessage: formattedError, + usage, + totalCostUsd: calculateApproximateCost(usage), + }); + } throw error; } finally { + if (cleanupTemporaryOptions) { + await cleanupTemporaryOptions(); + } consolePatcher.cleanup(); if (isTelemetrySdkInitialized()) { await shutdownTelemetry(config); diff --git a/packages/cli/src/streamJson/controller.ts b/packages/cli/src/streamJson/controller.ts new file mode 100644 index 000000000..a83721fa8 --- /dev/null +++ b/packages/cli/src/streamJson/controller.ts @@ -0,0 +1,173 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { randomUUID } from 'node:crypto'; +import type { StreamJsonWriter } from './writer.js'; +import type { + StreamJsonControlCancelRequestEnvelope, + StreamJsonControlResponseEnvelope, + StreamJsonOutputEnvelope, +} from './types.js'; + +interface PendingControlRequest { + resolve: (envelope: StreamJsonControlResponseEnvelope) => void; + reject: (error: Error) => void; + timeout?: NodeJS.Timeout; +} + +export interface ControlRequestOptions { + timeoutMs?: number; +} + +export class StreamJsonController { + private readonly pendingRequests = new Map(); + private activeAbortController: AbortController | null = null; + + constructor(private readonly writer: StreamJsonWriter) {} + + sendControlRequest( + subtype: string, + payload: Record, + options: ControlRequestOptions = {}, + ): Promise { + const requestId = randomUUID(); + const envelope: StreamJsonOutputEnvelope = { + type: 'control_request', + request_id: requestId, + request: { + subtype, + ...payload, + }, + }; + + const promise = new Promise( + (resolve, reject) => { + const pending: PendingControlRequest = { resolve, reject }; + + if (options.timeoutMs && options.timeoutMs > 0) { + pending.timeout = setTimeout(() => { + this.pendingRequests.delete(requestId); + reject( + new Error( + `Timed out waiting for control_response to ${subtype}`, + ), + ); + }, options.timeoutMs); + } + + this.pendingRequests.set(requestId, pending); + }, + ); + + this.writer.writeEnvelope(envelope); + return promise; + } + + handleControlResponse( + envelope: StreamJsonControlResponseEnvelope, + ): void { + const pending = this.pendingRequests.get(envelope.request_id); + if (!pending) { + return; + } + + if (pending.timeout) { + clearTimeout(pending.timeout); + } + + this.pendingRequests.delete(envelope.request_id); + pending.resolve(envelope); + } + + handleControlCancel( + envelope: StreamJsonControlCancelRequestEnvelope, + ): void { + if (envelope.request_id) { + this.rejectPending( + envelope.request_id, + new Error( + envelope.reason + ? `Control request cancelled: ${envelope.reason}` + : 'Control request cancelled', + ), + ); + return; + } + + for (const requestId of [...this.pendingRequests.keys()]) { + this.rejectPending( + requestId, + new Error( + envelope.reason + ? `Control request cancelled: ${envelope.reason}` + : 'Control request cancelled', + ), + ); + } + } + + setActiveRunAbortController( + controller: AbortController | null, + ): void { + this.activeAbortController = controller; + } + + interruptActiveRun(): void { + this.activeAbortController?.abort(); + } + + cancelPendingRequests(reason?: string, requestId?: string): void { + if (requestId) { + if (!this.pendingRequests.has(requestId)) { + return; + } + this.writer.writeEnvelope({ + type: 'control_cancel_request', + request_id: requestId, + reason, + }); + this.rejectPending( + requestId, + new Error( + reason + ? `Control request cancelled: ${reason}` + : 'Control request cancelled', + ), + ); + return; + } + + for (const pendingId of [...this.pendingRequests.keys()]) { + this.writer.writeEnvelope({ + type: 'control_cancel_request', + request_id: pendingId, + reason, + }); + this.rejectPending( + pendingId, + new Error( + reason + ? `Control request cancelled: ${reason}` + : 'Control request cancelled', + ), + ); + } + } + + private rejectPending(requestId: string, error: Error): void { + const pending = this.pendingRequests.get(requestId); + if (!pending) { + return; + } + + if (pending.timeout) { + clearTimeout(pending.timeout); + } + + this.pendingRequests.delete(requestId); + pending.reject(error); + } +} diff --git a/packages/cli/src/streamJson/input.test.ts b/packages/cli/src/streamJson/input.test.ts new file mode 100644 index 000000000..17b2d1e80 --- /dev/null +++ b/packages/cli/src/streamJson/input.test.ts @@ -0,0 +1,80 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from 'vitest'; +import { FatalInputError } from '@qwen-code/qwen-code-core'; +import { + parseStreamJsonInputFromIterable, + type ParsedStreamJsonInput, +} from './input.js'; + +function createAsyncIterable(lines: string[]): AsyncIterable { + return { + async *[Symbol.asyncIterator]() { + for (const line of lines) { + yield line; + } + }, + }; +} + +describe('parseStreamJsonInputFromIterable', () => { + it('parses user messages from stream-json lines', async () => { + const lines = createAsyncIterable([ + JSON.stringify({ + type: 'user', + message: { + content: 'Hello world', + }, + }), + ]); + + const result = await parseStreamJsonInputFromIterable(lines); + + expect(result).toEqual({ prompt: 'Hello world' }); + }); + + it('emits success control_response for initialize request', async () => { + const emitted: unknown[] = []; + const lines = createAsyncIterable([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-1', + request: { subtype: 'initialize' }, + }), + JSON.stringify({ + type: 'user', + message: { content: 'Start' }, + }), + ]); + + const result = await parseStreamJsonInputFromIterable(lines, (envelope) => { + emitted.push(envelope); + }); + + expect(result.prompt).toBe('Start'); + expect(emitted).toHaveLength(1); + expect(emitted[0]).toMatchObject({ + type: 'control_response', + request_id: 'req-1', + success: true, + }); + }); + + it('throws when no user message is provided', async () => { + const lines = createAsyncIterable([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-2', + request: { subtype: 'initialize' }, + }), + ]); + + await expect( + parseStreamJsonInputFromIterable(lines), + ).rejects.toBeInstanceOf(FatalInputError); + }); +}); diff --git a/packages/cli/src/streamJson/input.ts b/packages/cli/src/streamJson/input.ts new file mode 100644 index 000000000..0da040f7c --- /dev/null +++ b/packages/cli/src/streamJson/input.ts @@ -0,0 +1,132 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createInterface } from 'node:readline/promises'; +import process from 'node:process'; +import { + parseStreamJsonEnvelope, + serializeStreamJsonEnvelope, + type StreamJsonControlRequestEnvelope, + type StreamJsonOutputEnvelope, + type StreamJsonUserEnvelope, +} from './types.js'; +import { FatalInputError } from '@qwen-code/qwen-code-core'; + +export interface ParsedStreamJsonInput { + prompt: string; +} + +export async function readStreamJsonInput(): Promise { + const rl = createInterface({ + input: process.stdin, + crlfDelay: Number.POSITIVE_INFINITY, + terminal: false, + }); + + try { + return await parseStreamJsonInputFromIterable(rl); + } finally { + rl.close(); + } +} + +export async function parseStreamJsonInputFromIterable( + lines: AsyncIterable, + emitEnvelope: (envelope: StreamJsonOutputEnvelope) => void = writeEnvelope, +): Promise { + const promptParts: string[] = []; + let receivedUserMessage = false; + + for await (const rawLine of lines) { + const line = rawLine.trim(); + if (!line) { + continue; + } + + const envelope = parseStreamJsonEnvelope(line); + + switch (envelope.type) { + case 'user': + promptParts.push(extractUserMessageText(envelope)); + receivedUserMessage = true; + break; + case 'control_request': + handleControlRequest(envelope, emitEnvelope); + break; + case 'control_response': + case 'control_cancel_request': + // Currently ignored on CLI side. + break; + default: + throw new FatalInputError( + `Unsupported stream-json input type: ${envelope.type}`, + ); + } + } + + if (!receivedUserMessage) { + throw new FatalInputError( + 'No user message provided via stream-json input.', + ); + } + + return { + prompt: promptParts.join('\n').trim(), + }; +} + +function handleControlRequest( + envelope: StreamJsonControlRequestEnvelope, + emitEnvelope: (envelope: StreamJsonOutputEnvelope) => void, +) { + const subtype = envelope.request?.subtype; + if (subtype === 'initialize') { + emitEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response: { + subtype, + capabilities: {}, + }, + }); + return; + } + + emitEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: `Unsupported control_request subtype: ${subtype ?? 'unknown'}`, + }); +} + +export function extractUserMessageText( + envelope: StreamJsonUserEnvelope, +): string { + const content = envelope.message?.content; + if (typeof content === 'string') { + return content; + } + if (Array.isArray(content)) { + return content + .map((block) => { + if (block && typeof block === 'object' && 'type' in block) { + if (block.type === 'text' && 'text' in block) { + return block.text ?? ''; + } + return JSON.stringify(block); + } + return ''; + }) + .join('\n'); + } + return ''; +} + +function writeEnvelope(envelope: StreamJsonOutputEnvelope): void { + process.stdout.write(`${serializeStreamJsonEnvelope(envelope)}\n`); +} diff --git a/packages/cli/src/streamJson/session.test.ts b/packages/cli/src/streamJson/session.test.ts new file mode 100644 index 000000000..c1e125edb --- /dev/null +++ b/packages/cli/src/streamJson/session.test.ts @@ -0,0 +1,530 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Readable } from 'node:stream'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { ApprovalMode, type Config } from '@qwen-code/qwen-code-core'; +import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { runStreamJsonSession } from './session.js'; +import type { StreamJsonWriter } from './writer.js'; + +const runNonInteractiveMock = vi.fn(); +const logUserPromptMock = vi.fn(); +const connectToMcpServerMock = vi.fn(); + +vi.mock('../nonInteractiveCli.js', () => ({ + runNonInteractive: (...args: unknown[]) => runNonInteractiveMock(...args), +})); + +vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + logUserPrompt: (...args: unknown[]) => logUserPromptMock(...args), + connectToMcpServer: (...args: unknown[]) => connectToMcpServerMock(...args), + }; +}); + +function createConfig(): Config { + const getToolMock = vi.fn().mockReturnValue({}); + const toolRegistryMock = { + getTool: getToolMock, + getAllToolNames: vi.fn().mockReturnValue([]), + }; + + const getToolRegistryMock = vi.fn().mockReturnValue(toolRegistryMock); + const getApprovalModeMock = vi.fn().mockReturnValue(ApprovalMode.AUTO_EDIT); + const getMcpServersMock = vi.fn().mockReturnValue({ default: {} }); + const workspaceContext = { + getDirectories: vi.fn().mockReturnValue([process.cwd()]), + onDirectoriesChanged: vi.fn().mockReturnValue(() => {}), + }; + const ideClientMock = { + getDetectedIdeDisplayName: vi.fn().mockReturnValue(''), + getCurrentIde: vi.fn().mockReturnValue(null), + }; + const storageMock = { + getProjectTempCheckpointsDir: vi.fn().mockReturnValue(null), + }; + + return { + getIncludePartialMessages: () => false, + getContentGeneratorConfig: () => ({ authType: 'test' }), + getOutputFormat: () => 'stream-json', + setApprovalMode: vi.fn(), + setModel: vi.fn().mockResolvedValue(undefined), + getApprovalMode: getApprovalModeMock, + getToolRegistry: getToolRegistryMock, + getMcpServers: getMcpServersMock, + getDebugMode: () => false, + getWorkspaceContext: () => workspaceContext, + getIdeClient: () => ideClientMock, + getIdeMode: () => false, + getCheckpointingEnabled: () => false, + storage: storageMock, + } as unknown as Config; +} + +function createWriter() { + const writeEnvelope = vi.fn(); + return { + emitResult: vi.fn(), + writeEnvelope, + emitSystemMessage: vi + .fn<(subtype: string, data?: unknown) => void>() + .mockImplementation((subtype, data) => { + writeEnvelope({ + type: 'system', + subtype, + session_id: 'test-session', + data, + }); + }), + } as unknown as StreamJsonWriter; +} + +describe('runStreamJsonSession', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('processes initial prompt before reading stream', async () => { + const config = createConfig(); + const writer = createWriter(); + const stream = Readable.from([]); + runNonInteractiveMock.mockResolvedValueOnce(undefined); + + await runStreamJsonSession(config, 'Hello world', { + input: stream, + writer, + }); + + expect(runNonInteractiveMock).toHaveBeenCalledTimes(1); + expect(runNonInteractiveMock).toHaveBeenCalledWith( + config, + 'Hello world', + expect.any(String), + expect.objectContaining({ + streamJson: expect.objectContaining({ writer }), + abortController: expect.any(AbortController), + }), + ); + expect(logUserPromptMock).toHaveBeenCalledTimes(1); + }); + + it('processes user envelope even when prompt is empty', async () => { + const config = createConfig(); + const writer = createWriter(); + const stream = Readable.from([ + JSON.stringify({ + type: 'user', + message: { content: '' }, + parent_tool_use_id: 'tool-parent-1', + }) + '\n', + ]); + runNonInteractiveMock.mockResolvedValueOnce(undefined); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(runNonInteractiveMock).toHaveBeenCalledWith( + config, + '', + expect.any(String), + expect.objectContaining({ + userEnvelope: expect.objectContaining({ + parent_tool_use_id: 'tool-parent-1', + }), + }), + ); + expect(logUserPromptMock).not.toHaveBeenCalled(); + }); + + it('responds to initialize control request and handles user message', async () => { + const config = createConfig(); + const writer = createWriter(); + const lines = [ + JSON.stringify({ + type: 'control_request', + request_id: 'req-1', + request: { subtype: 'initialize' }, + }) + '\n', + JSON.stringify({ + type: 'user', + message: { content: 'Second prompt' }, + }) + '\n', + ]; + const stream = Readable.from(lines); + runNonInteractiveMock.mockResolvedValueOnce(undefined); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-1', + success: true, + response: expect.objectContaining({ + capabilities: expect.objectContaining({ + can_handle_can_use_tool: true, + can_handle_hook_callback: true, + can_handle_mcp_message: true, + }), + }), + }), + ); + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'system', + subtype: 'init', + }), + ); + expect(runNonInteractiveMock).toHaveBeenCalledTimes(1); + expect(runNonInteractiveMock).toHaveBeenCalledWith( + config, + 'Second prompt', + expect.any(String), + expect.objectContaining({ + streamJson: expect.objectContaining({ writer }), + abortController: expect.any(AbortController), + userEnvelope: expect.objectContaining({ + type: 'user', + }), + }), + ); + }); + + it('supports multiple sequential user prompts in persistent session', async () => { + const config = createConfig(); + const writer = createWriter(); + const lines = [ + JSON.stringify({ + type: 'user', + message: { content: 'first request' }, + }) + '\n', + JSON.stringify({ + type: 'user', + message: { content: 'second request' }, + }) + '\n', + ]; + const stream = Readable.from(lines); + runNonInteractiveMock.mockResolvedValue(undefined); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(runNonInteractiveMock).toHaveBeenCalledTimes(2); + expect(runNonInteractiveMock.mock.calls[0][1]).toBe('first request'); + expect(runNonInteractiveMock.mock.calls[1][1]).toBe('second request'); + }); + + it('honours interrupt control request and stops session', async () => { + const config = createConfig(); + const writer = createWriter(); + const lines = [ + JSON.stringify({ + type: 'control_request', + request_id: 'req-interrupt', + request: { subtype: 'interrupt' }, + }) + '\n', + JSON.stringify({ + type: 'user', + message: { content: 'should not run' }, + }) + '\n', + ]; + const stream = Readable.from(lines); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-interrupt', + success: true, + }), + ); + expect(runNonInteractiveMock).not.toHaveBeenCalled(); + }); + + it('emits error result when JSON parsing fails', async () => { + const config = createConfig(); + const writer = createWriter(); + const stream = Readable.from(['{invalid json']); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(writer.emitResult).toHaveBeenCalledWith( + expect.objectContaining({ + isError: true, + }), + ); + expect(runNonInteractiveMock).not.toHaveBeenCalled(); + }); + + it('updates approval mode when receiving set_permission_mode', async () => { + const config = createConfig(); + const writer = createWriter(); + const stream = Readable.from([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-permission', + request: { subtype: 'set_permission_mode', mode: 'auto-edit' }, + }) + '\n', + ]); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(config.setApprovalMode).toHaveBeenCalledWith( + ApprovalMode.AUTO_EDIT, + ); + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-permission', + success: true, + }), + ); + }); + + it('invokes config.setModel for set_model requests', async () => { + const config = createConfig(); + const writer = createWriter(); + const stream = Readable.from([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-model', + request: { subtype: 'set_model', model: 'new-model' }, + }) + '\n', + ]); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(config.setModel).toHaveBeenCalledWith('new-model'); + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-model', + success: true, + }), + ); + }); + + it('handles can_use_tool requests using approval mode', async () => { + const config = createConfig(); + const writer = createWriter(); + const stream = Readable.from([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-tool', + request: { subtype: 'can_use_tool', tool_name: 'edit' }, + }) + '\n', + ]); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-tool', + success: true, + response: expect.objectContaining({ + behavior: 'allow', + }), + }), + ); + }); + + it('denies can_use_tool when approval mode is manual', async () => { + const config = createConfig(); + (config.getApprovalMode as unknown as vi.Mock).mockReturnValue( + ApprovalMode.DEFAULT, + ); + const writer = createWriter(); + const stream = Readable.from([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-tool', + request: { subtype: 'can_use_tool', tool_name: 'edit' }, + }) + '\n', + ]); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-tool', + success: true, + response: expect.objectContaining({ + behavior: 'deny', + }), + }), + ); + }); + + it('handles hook_callback requests with default continue decision', async () => { + const config = createConfig(); + const writer = createWriter(); + const initializeLine = JSON.stringify({ + type: 'control_request', + request_id: 'req-init', + request: { + subtype: 'initialize', + hooks: { + pre_tool: [ + { + matcher: null, + hookCallbackIds: ['hook-1'], + }, + ], + }, + }, + }); + const hookCallLine = JSON.stringify({ + type: 'control_request', + request_id: 'req-hook', + request: { subtype: 'hook_callback', callback_id: 'hook-1' }, + }); + const stream = Readable.from([`${initializeLine}\n`, `${hookCallLine}\n`]); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-hook', + success: true, + response: expect.objectContaining({ + decision: 'continue', + }), + }), + ); + }); + + it('returns descriptive error for unknown MCP server requests', async () => { + const config = createConfig(); + const writer = createWriter(); + (config.getMcpServers as unknown as vi.Mock).mockReturnValue({}); + const stream = Readable.from([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-mcp', + request: { subtype: 'mcp_message', server_name: 'missing', message: {} }, + }) + '\n', + ]); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-mcp', + success: false, + error: expect.objectContaining({ + message: expect.stringContaining('missing'), + }), + }), + ); + }); + + it('forwards mcp_message requests to configured MCP server', async () => { + const config = createConfig(); + const writer = createWriter(); + (config.getMcpServers as unknown as vi.Mock).mockReturnValue({ + default: { command: 'echo' }, + }); + + const clientRequest = vi.fn().mockResolvedValue({}); + const clientNotification = vi.fn().mockResolvedValue(undefined); + const clientClose = vi.fn().mockResolvedValue(undefined); + connectToMcpServerMock.mockResolvedValueOnce({ + request: clientRequest, + notification: clientNotification, + close: clientClose, + onclose: undefined, + } as unknown as Client); + + const stream = Readable.from([ + JSON.stringify({ + type: 'control_request', + request_id: 'req-mcp', + request: { + subtype: 'mcp_message', + server_name: 'default', + message: { + jsonrpc: '2.0', + id: '42', + method: 'tools/list', + params: {}, + }, + }, + }) + '\n', + ]); + + await runStreamJsonSession(config, undefined, { + input: stream, + writer, + }); + + expect(connectToMcpServerMock).toHaveBeenCalledWith( + 'default', + expect.objectContaining({ command: 'echo' }), + false, + expect.any(Object), + ); + expect(clientRequest).toHaveBeenCalledWith( + expect.objectContaining({ method: 'tools/list', id: '42' }), + expect.anything(), + expect.objectContaining({ timeout: expect.any(Number) }), + ); + expect(writer.writeEnvelope).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'control_response', + request_id: 'req-mcp', + success: true, + response: expect.objectContaining({ + subtype: 'mcp_message', + mcp_response: expect.objectContaining({ + id: '42', + result: {}, + }), + }), + }), + ); + expect(clientClose).toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/streamJson/session.ts b/packages/cli/src/streamJson/session.ts new file mode 100644 index 000000000..b82910332 --- /dev/null +++ b/packages/cli/src/streamJson/session.ts @@ -0,0 +1,974 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import readline from 'node:readline'; +import type { + Config, + MCPServerConfig, + ToolCallRequestInfo, +} from '@qwen-code/qwen-code-core'; +import { + ApprovalMode, + MCP_DEFAULT_TIMEOUT_MSEC, + connectToMcpServer, + logUserPrompt, +} from '@qwen-code/qwen-code-core'; +import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { ResultSchema } from '@modelcontextprotocol/sdk/types.js'; +import { + parseStreamJsonEnvelope, + type StreamJsonEnvelope, + type StreamJsonControlRequestEnvelope, + type StreamJsonOutputEnvelope, + type StreamJsonUserEnvelope, +} from './types.js'; +import { extractUserMessageText } from './input.js'; +import { StreamJsonWriter } from './writer.js'; +import { StreamJsonController } from './controller.js'; +import { runNonInteractive } from '../nonInteractiveCli.js'; +import { CommandService } from '../services/CommandService.js'; +import { BuiltinCommandLoader } from '../services/BuiltinCommandLoader.js'; + +interface HookCallbackRegistration { + event: string; + matcher?: unknown; +} + +export interface StreamJsonSessionOptions { + input?: NodeJS.ReadableStream; + writer?: StreamJsonWriter; +} + +interface PromptJob { + prompt: string; + envelope?: StreamJsonUserEnvelope; +} + +export interface StreamJsonControlContext { + readonly hookCallbacks: Map; + readonly registeredHookEvents: Set; + readonly mcpClients: Map; +} + +export async function runStreamJsonSession( + config: Config, + initialPrompt: string | undefined, + options: StreamJsonSessionOptions = {}, +): Promise { + const inputStream = options.input ?? process.stdin; + const writer = + options.writer ?? + new StreamJsonWriter(config, config.getIncludePartialMessages()); + + const controller = new StreamJsonController(writer); + const controlContext: StreamJsonControlContext = { + hookCallbacks: new Map(), + registeredHookEvents: new Set(), + mcpClients: new Map(), + }; + const toolCallRegistry = new Map(); + const promptQueue: PromptJob[] = []; + let activeRun: Promise | null = null; + let terminated = false; + + const processQueue = async (): Promise => { + if (activeRun || promptQueue.length === 0) { + return; + } + + const job = promptQueue.shift(); + if (terminated || !job || (!job.prompt && !job.envelope)) { + void processQueue(); + return; + } + + const abortController = new AbortController(); + controller.setActiveRunAbortController(abortController); + + const runPromise = handleUserPrompt( + config, + writer, + controller, + job, + abortController, + toolCallRegistry, + controlContext, + ) + .catch((error) => { + console.error('Failed to handle stream-json prompt:', error); + }) + .finally(() => { + controller.setActiveRunAbortController(null); + }); + + activeRun = runPromise; + try { + await runPromise; + } finally { + activeRun = null; + void processQueue(); + } + }; + + const enqueuePrompt = (job: PromptJob): void => { + if (terminated) { + return; + } + if (!job.prompt && !job.envelope) { + return; + } + promptQueue.push(job); + void processQueue(); + }; + + if (initialPrompt && initialPrompt.trim().length > 0) { + enqueuePrompt({ prompt: initialPrompt.trim() }); + } + + const rl = readline.createInterface({ + input: inputStream, + crlfDelay: Number.POSITIVE_INFINITY, + terminal: false, + }); + + try { + for await (const rawLine of rl) { + const line = rawLine.trim(); + if (!line) { + continue; + } + + let envelope: StreamJsonEnvelope; + try { + envelope = parseStreamJsonEnvelope(line); + } catch (error) { + writer.emitResult({ + isError: true, + numTurns: 0, + errorMessage: + error instanceof Error ? error.message : 'Failed to parse JSON', + }); + continue; + } + + if (terminated) { + break; + } + + switch (envelope.type) { + case 'user': + enqueuePrompt({ + prompt: extractUserMessageText(envelope).trim(), + envelope, + }); + break; + case 'control_request': { + const shouldTerminate = await handleControlRequest( + config, + controller, + envelope, + writer, + controlContext, + ); + if (shouldTerminate) { + terminated = true; + promptQueue.length = 0; + toolCallRegistry.clear(); + } + break; + } + case 'control_response': + controller.handleControlResponse(envelope); + break; + case 'control_cancel_request': + controller.handleControlCancel(envelope); + break; + default: { + writer.emitResult({ + isError: true, + numTurns: 0, + errorMessage: `Unsupported stream-json input type: ${envelope.type}`, + }); + } + } + + if (terminated) { + break; + } + } + } finally { + rl.close(); + toolCallRegistry.clear(); + if (controlContext.mcpClients.size > 0) { + const closePromises = Array.from(controlContext.mcpClients.values()).map( + async ({ client }) => { + try { + await client.close(); + } catch (error) { + console.debug('Failed to close MCP client:', error); + } + }, + ); + await Promise.all(closePromises); + controlContext.mcpClients.clear(); + } + if (activeRun) { + try { + await activeRun; + } catch (error) { + console.error('Active stream-json prompt failed:', error); + } + } + } +} + +async function handleUserPrompt( + config: Config, + writer: StreamJsonWriter, + controller: StreamJsonController, + job: PromptJob, + abortController: AbortController, + toolCallRegistry: Map, + controlContext: StreamJsonControlContext, +): Promise { + const prompt = job.prompt ?? ''; + const hasTextPrompt = prompt.trim().length > 0; + if (!hasTextPrompt && !job.envelope) { + return; + } + + const prompt_id = Math.random().toString(16).slice(2); + if (hasTextPrompt) { + logUserPrompt(config, { + 'event.name': 'user_prompt', + 'event.timestamp': new Date().toISOString(), + prompt, + prompt_id, + auth_type: config.getContentGeneratorConfig()?.authType, + prompt_length: prompt.length, + }); + } + + try { + await runNonInteractive(config, prompt, prompt_id, { + abortController, + streamJson: { + writer, + controller, + toolCallRegistry, + controlContext, + }, + userEnvelope: job.envelope, + }); + } catch (error) { + writer.emitResult({ + isError: true, + numTurns: 1, + errorMessage: + error instanceof Error ? error.message : 'Failed to process prompt', + }); + } +} + +async function handleControlRequest( + config: Config, + controller: StreamJsonController, + envelope: StreamJsonControlRequestEnvelope, + writer: StreamJsonWriter, + controlContext: StreamJsonControlContext, +): Promise { + const subtype = envelope.request?.subtype; + if (subtype === 'initialize') { + const capabilities = buildControlCapabilities(config); + registerHookCallbacks(controlContext, envelope.request?.['hooks']); + + const systemInitData = await buildSystemInitData(config); + + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response: { + subtype, + capabilities, + }, + } satisfies StreamJsonOutputEnvelope); + writer.emitSystemMessage('init', systemInitData); + return false; + } + + if (subtype === 'interrupt') { + controller.interruptActiveRun(); + controller.cancelPendingRequests('interrupted'); + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response: { subtype }, + }); + return true; + } + + if (subtype === 'set_permission_mode') { + const mode = envelope.request?.['mode']; + const response = mapPermissionMode(mode); + if (response instanceof Error) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: response.message, + }); + return false; + } + + try { + config.setApprovalMode(response); + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response: { subtype }, + }); + } catch (error) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: + error instanceof Error + ? error.message + : 'Failed to set permission mode', + }); + } + return false; + } + + if (subtype === 'set_model') { + const model = envelope.request?.['model']; + if (typeof model !== 'string' || model.trim() === '') { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { + message: 'Invalid model specified for set_model request', + }, + }); + return false; + } + try { + await config.setModel(model); + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response: { subtype, model }, + }); + } catch (error) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: + error instanceof Error + ? error.message + : 'Failed to set model', + }); + } + return false; + } + + if (subtype === 'can_use_tool') { + await handleCanUseToolRequest(config, envelope, writer); + return false; + } + + if (subtype === 'hook_callback') { + await handleHookCallbackRequest(controlContext, envelope, writer); + return false; + } + + if (subtype === 'mcp_message') { + await handleMcpMessageRequest(config, envelope, writer, controlContext); + return false; + } + + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { + message: `Unsupported control_request subtype: ${subtype ?? 'unknown'}`, + }, + }); + return false; +} + +function mapPermissionMode(rawMode: unknown): Error | ApprovalMode { + if (typeof rawMode !== 'string' || rawMode.trim() === '') { + return new Error('Permission mode must be a non-empty string'); + } + + const normalized = rawMode.trim().toLowerCase(); + switch (normalized) { + case 'default': + case 'manual': + case 'prompt': + return ApprovalMode.DEFAULT; + case 'plan': + return ApprovalMode.PLAN; + case 'auto': + case 'auto-edit': + case 'auto_edit': + case 'acceptedits': + case 'accept_edits': + return ApprovalMode.AUTO_EDIT; + case 'bypasspermissions': + case 'bypass_permissions': + case 'yolo': + case 'allow_all': + return ApprovalMode.YOLO; + default: + return new Error(`Unsupported permission mode: ${rawMode}`); + } +} + +function mapApprovalModeToString(mode: ApprovalMode | undefined): string { + switch (mode) { + case ApprovalMode.PLAN: + return 'plan'; + case ApprovalMode.AUTO_EDIT: + return 'auto-edit'; + case ApprovalMode.YOLO: + return 'bypass_permissions'; + case ApprovalMode.DEFAULT: + default: + return 'manual'; + } +} + +async function buildSystemInitData( + config: Config, +): Promise> { + const data: Record = { + cwd: + typeof (config as { getTargetDir?: () => string }).getTargetDir === + 'function' + ? (config as { getTargetDir: () => string }).getTargetDir() + : process.cwd(), + model: + typeof config.getModel === 'function' ? config.getModel() : undefined, + session_id: + typeof config.getSessionId === 'function' ? config.getSessionId() : undefined, + input_format: + typeof config.getInputFormat === 'function' + ? config.getInputFormat() + : undefined, + output_format: + typeof config.getOutputFormat === 'function' + ? config.getOutputFormat() + : undefined, + slash_commands: [] as string[], + tools: [] as string[], + setting_sources: [] as string[], + permission_mode: mapApprovalModeToString( + typeof config.getApprovalMode === 'function' + ? config.getApprovalMode() + : undefined, + ), + include_partial_messages: + typeof config.getIncludePartialMessages === 'function' + ? config.getIncludePartialMessages() + : undefined, + core_tools: + typeof config.getCoreTools === 'function' ? config.getCoreTools() : undefined, + allowed_tools: + typeof config.getAllowedTools === 'function' + ? config.getAllowedTools() + : undefined, + excluded_tools: + typeof config.getExcludeTools === 'function' + ? config.getExcludeTools() + : undefined, + tool_discovery_command: + typeof config.getToolDiscoveryCommand === 'function' + ? config.getToolDiscoveryCommand() + : undefined, + tool_call_command: + typeof config.getToolCallCommand === 'function' + ? config.getToolCallCommand() + : undefined, + mcp_server_command: + typeof config.getMcpServerCommand === 'function' + ? config.getMcpServerCommand() + : undefined, + full_context: + typeof config.getFullContext === 'function' + ? config.getFullContext() + : undefined, + max_session_turns: + typeof config.getMaxSessionTurns === 'function' + ? config.getMaxSessionTurns() + : undefined, + debug_mode: + typeof config.getDebugMode === 'function' ? config.getDebugMode() : undefined, + }; + + try { + const registryProvider = config as unknown as { + getToolRegistry?: () => { + getAllToolNames?: () => string[]; + }; + }; + if (typeof registryProvider.getToolRegistry === 'function') { + const registry = registryProvider.getToolRegistry(); + if (registry && typeof registry.getAllToolNames === 'function') { + data['tools'] = registry.getAllToolNames(); + } + } + } catch (error) { + console.debug('Failed to collect tool metadata for system message:', error); + } + + try { + const slashCommands = await loadSlashCommandNames(config); + data['slash_commands'] = slashCommands; + } catch (error) { + console.debug('Failed to collect slash commands for system message:', error); + } + + try { + const mcpProvider = config as unknown as { + getMcpServers?: () => Record | undefined; + }; + if (typeof mcpProvider.getMcpServers === 'function') { + const servers = mcpProvider.getMcpServers(); + if (servers) { + data['mcp_servers'] = Object.keys(servers); + } + } + } catch (error) { + console.debug('Failed to collect MCP metadata for system message:', error); + } + + return data; +} + +function buildControlCapabilities(config: Config): Record { + const capabilities: Record = { + can_handle_can_use_tool: true, + can_handle_hook_callback: true, + can_set_permission_mode: typeof config.setApprovalMode === 'function', + can_set_model: typeof config.setModel === 'function', + }; + + try { + const mcpProvider = config as unknown as { + getMcpServers?: () => Record | undefined; + }; + if (typeof mcpProvider.getMcpServers === 'function') { + const servers = mcpProvider.getMcpServers(); + capabilities['can_handle_mcp_message'] = Boolean(servers && Object.keys(servers).length > 0); + } else { + capabilities['can_handle_mcp_message'] = false; + } + } catch (error) { + console.debug('Failed to determine MCP capability:', error); + capabilities['can_handle_mcp_message'] = false; + } + + return capabilities; +} + +async function loadSlashCommandNames(config: Config): Promise { + const controller = new AbortController(); + try { + const service = await CommandService.create( + [new BuiltinCommandLoader(config)], + controller.signal, + ); + const names = new Set(); + const commands = service.getCommands(); + for (const command of commands) { + names.add(command.name); + } + return Array.from(names).sort(); + } catch (error) { + console.debug('Failed to load slash commands:', error); + return []; + } finally { + controller.abort(); + } +} + +function registerHookCallbacks( + context: StreamJsonControlContext, + rawHooks: unknown, +): void { + if (!rawHooks || typeof rawHooks !== 'object') { + return; + } + + context.hookCallbacks.clear(); + context.registeredHookEvents.clear(); + + for (const [event, matchers] of Object.entries(rawHooks as Record)) { + context.registeredHookEvents.add(event); + if (!Array.isArray(matchers)) { + continue; + } + for (const matcher of matchers) { + if ( + !matcher || + typeof matcher !== 'object' || + !('hookCallbackIds' in matcher) + ) { + continue; + } + const typedMatcher = matcher as { + hookCallbackIds?: unknown[]; + matcher?: unknown; + }; + if (!Array.isArray(typedMatcher.hookCallbackIds)) { + continue; + } + for (const callbackId of typedMatcher.hookCallbackIds) { + if (typeof callbackId === 'string' && callbackId.trim().length > 0) { + context.hookCallbacks.set(callbackId, { + event, + matcher: typedMatcher.matcher, + }); + } + } + } + } +} + +async function handleCanUseToolRequest( + config: Config, + envelope: StreamJsonControlRequestEnvelope, + writer: StreamJsonWriter, +): Promise { + const toolName = envelope.request?.['tool_name']; + if (typeof toolName !== 'string' || toolName.trim().length === 0) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { + message: 'Missing or invalid tool_name in can_use_tool request', + }, + }); + return; + } + + let behavior: 'allow' | 'deny' = 'allow'; + let message: string | undefined; + + try { + const approvalProvider = config as unknown as { + getApprovalMode?: () => ApprovalMode; + }; + const approvalMode = + typeof approvalProvider.getApprovalMode === 'function' + ? approvalProvider.getApprovalMode() + : ApprovalMode.DEFAULT; + + if ( + approvalMode !== ApprovalMode.AUTO_EDIT && + approvalMode !== ApprovalMode.YOLO && + approvalMode !== ApprovalMode.PLAN + ) { + behavior = 'deny'; + message = + 'Tool execution requires manual approval. Update permission mode or approve via host.'; + } + + const registryProvider = config as unknown as { + getToolRegistry?: () => { + getTool?: (name: string) => unknown; + }; + }; + + if (behavior === 'allow' && typeof registryProvider.getToolRegistry === 'function') { + const registry = registryProvider.getToolRegistry(); + if (registry && typeof registry.getTool === 'function' && !registry.getTool(toolName)) { + behavior = 'deny'; + message = `Tool "${toolName}" is not registered.`; + } + } + } catch (error) { + behavior = 'deny'; + message = + error instanceof Error + ? `Failed to evaluate tool permission: ${error.message}` + : 'Failed to evaluate tool permission'; + } + + const response: Record = { + subtype: 'can_use_tool', + behavior, + }; + + if (message) { + response['message'] = message; + } + + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response, + }); +} + +async function handleHookCallbackRequest( + context: StreamJsonControlContext, + envelope: StreamJsonControlRequestEnvelope, + writer: StreamJsonWriter, +): Promise { + const callbackId = envelope.request?.['callback_id']; + if (typeof callbackId !== 'string' || callbackId.trim().length === 0) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { message: 'Missing callback_id in hook_callback request' }, + }); + return; + } + + if (!context.hookCallbacks.has(callbackId)) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { message: `Unknown hook callback id: ${callbackId}` }, + }); + return; + } + + const response: Record = { + subtype: 'hook_callback', + decision: 'continue', + async: false, + }; + + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response, + }); +} + +async function handleMcpMessageRequest( + config: Config, + envelope: StreamJsonControlRequestEnvelope, + writer: StreamJsonWriter, + context: StreamJsonControlContext, +): Promise { + const serverNameRaw = envelope.request?.['server_name']; + if (typeof serverNameRaw !== 'string' || serverNameRaw.trim().length === 0) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { message: 'Missing server_name in mcp_message request' }, + }); + return; + } + + const message = envelope.request?.['message']; + if (!message || typeof message !== 'object') { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { message: 'Missing or invalid message payload for mcp_message request' }, + }); + return; + } + + let clientEntry: { client: Client; config: MCPServerConfig }; + try { + clientEntry = await getOrCreateMcpClient( + config, + context, + serverNameRaw.trim(), + ); + } catch (error) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { + message: + error instanceof Error + ? error.message + : 'Failed to connect to MCP server', + }, + }); + return; + } + + const payload = message as Record; + const method = payload['method']; + if (typeof method !== 'string' || method.trim().length === 0) { + writer.writeEnvelope({ + type: 'control_response', + request_id: envelope.request_id, + success: false, + error: { message: 'Invalid MCP message: missing method' }, + }); + return; + } + + const jsonrpcVersion = + typeof payload['jsonrpc'] === 'string' ? (payload['jsonrpc'] as string) : '2.0'; + const messageId = payload['id'] as string | number | null | undefined; + const params = payload['params'] as Record | undefined; + const timeout = + typeof clientEntry.config.timeout === 'number' + ? clientEntry.config.timeout + : MCP_DEFAULT_TIMEOUT_MSEC; + + const buildSuccess = (result: unknown): StreamJsonOutputEnvelope => ({ + type: 'control_response', + request_id: envelope.request_id, + success: true, + response: { + subtype: 'mcp_message', + mcp_response: { + jsonrpc: jsonrpcVersion, + id: messageId ?? null, + result, + }, + }, + }); + + const buildError = ( + code: number, + messageText: string, + data?: unknown, + ): StreamJsonOutputEnvelope => { + const errorBody: Record = { + code, + message: messageText, + }; + if (data !== undefined) { + errorBody['data'] = data; + } + return { + type: 'control_response', + request_id: envelope.request_id, + success: true, + response: { + subtype: 'mcp_message', + mcp_response: { + jsonrpc: jsonrpcVersion, + id: messageId ?? null, + error: errorBody, + }, + }, + }; + }; + + try { + if (messageId === undefined) { + await clientEntry.client.notification({ + jsonrpc: jsonrpcVersion, + method, + params, + }); + writer.writeEnvelope( + buildSuccess({ success: true, acknowledged: true }), + ); + return; + } + + const result = await clientEntry.client.request( + { + jsonrpc: jsonrpcVersion, + id: messageId, + method, + params, + }, + ResultSchema, + { timeout }, + ); + + writer.writeEnvelope(buildSuccess(result)); + } catch (error) { + if (error instanceof Error && /closed/i.test(error.message)) { + context.mcpClients.delete(serverNameRaw.trim()); + } + const errorCode = + typeof (error as { code?: unknown })?.code === 'number' + ? ((error as { code: number }).code as number) + : -32603; + const errorMessage = + error instanceof Error + ? error.message + : 'Failed to execute MCP request'; + const errorData = (error as { data?: unknown })?.data; + writer.writeEnvelope(buildError(errorCode, errorMessage, errorData)); + } +} + +async function getOrCreateMcpClient( + config: Config, + context: StreamJsonControlContext, + serverName: string, +): Promise<{ client: Client; config: MCPServerConfig }> { + const cached = context.mcpClients.get(serverName); + if (cached) { + return cached; + } + + const provider = config as unknown as { + getMcpServers?: () => Record | undefined; + getDebugMode?: () => boolean; + getWorkspaceContext?: () => unknown; + }; + + if (typeof provider.getMcpServers !== 'function') { + throw new Error(`MCP server "${serverName}" is not configured`); + } + + const servers = provider.getMcpServers() ?? {}; + const serverConfig = servers[serverName]; + if (!serverConfig) { + throw new Error(`MCP server "${serverName}" is not configured`); + } + + const debugMode = + typeof provider.getDebugMode === 'function' ? provider.getDebugMode() : false; + + const workspaceContext = + typeof provider.getWorkspaceContext === 'function' + ? provider.getWorkspaceContext() + : undefined; + + if (!workspaceContext) { + throw new Error('Workspace context is not available for MCP connection'); + } + + const client = await connectToMcpServer( + serverName, + serverConfig, + debugMode, + workspaceContext as { + getDirectories: () => string[]; + onDirectoriesChanged: (listener: () => void) => () => void; + }, + ); + + const entry = { client, config: serverConfig }; + context.mcpClients.set(serverName, entry); + return entry; +} diff --git a/packages/cli/src/streamJson/types.ts b/packages/cli/src/streamJson/types.ts new file mode 100644 index 000000000..e2a042a45 --- /dev/null +++ b/packages/cli/src/streamJson/types.ts @@ -0,0 +1,185 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export type StreamJsonFormat = 'text' | 'stream-json'; + +export interface StreamJsonAnnotation { + type: string; + value: string; +} + +export interface StreamJsonTextBlock { + type: 'text'; + text: string; + annotations?: StreamJsonAnnotation[]; +} + +export interface StreamJsonThinkingBlock { + type: 'thinking'; + thinking: string; + signature?: string; + annotations?: StreamJsonAnnotation[]; +} + +export interface StreamJsonToolUseBlock { + type: 'tool_use'; + id: string; + name: string; + input: unknown; + annotations?: StreamJsonAnnotation[]; +} + +export interface StreamJsonToolResultBlock { + type: 'tool_result'; + tool_use_id: string; + content?: StreamJsonContentBlock[] | string; + is_error?: boolean; + annotations?: StreamJsonAnnotation[]; +} + +export type StreamJsonContentBlock = + | StreamJsonTextBlock + | StreamJsonThinkingBlock + | StreamJsonToolUseBlock + | StreamJsonToolResultBlock; + +export interface StreamJsonAssistantEnvelope { + type: 'assistant'; + message: { + role: 'assistant'; + model?: string; + content: StreamJsonContentBlock[]; + }; + parent_tool_use_id?: string; +} + +export interface StreamJsonUserEnvelope { + type: 'user'; + message: { + role?: 'user'; + content: string | StreamJsonContentBlock[]; + }; + parent_tool_use_id?: string; + options?: Record; +} + +export interface StreamJsonSystemEnvelope { + type: 'system'; + subtype?: string; + session_id?: string; + data?: unknown; +} + +export interface StreamJsonUsage { + input_tokens?: number; + output_tokens?: number; + total_tokens?: number; + cache_creation_input_tokens?: number; + cache_read_input_tokens?: number; +} + +export interface StreamJsonResultEnvelope { + type: 'result'; + subtype?: string; + duration_ms?: number; + duration_api_ms?: number; + num_turns?: number; + session_id?: string; + is_error?: boolean; + summary?: string; + usage?: StreamJsonUsage; + total_cost_usd?: number; + error?: { type?: string; message: string; [key: string]: unknown }; + [key: string]: unknown; +} + +export interface StreamJsonMessageStreamEvent { + type: string; + index?: number; + delta?: unknown; + [key: string]: unknown; +} + +export interface StreamJsonStreamEventEnvelope { + type: 'stream_event'; + uuid: string; + session_id?: string; + event: StreamJsonMessageStreamEvent; +} + +export interface StreamJsonControlRequestEnvelope { + type: 'control_request'; + request_id: string; + request: { + subtype: string; + [key: string]: unknown; + }; +} + +export interface StreamJsonControlResponseEnvelope { + type: 'control_response'; + request_id: string; + success?: boolean; + response?: unknown; + error?: string | { message: string; [key: string]: unknown }; +} + +export interface StreamJsonControlCancelRequestEnvelope { + type: 'control_cancel_request'; + request_id?: string; + reason?: string; +} + +export type StreamJsonOutputEnvelope = + | StreamJsonAssistantEnvelope + | StreamJsonUserEnvelope + | StreamJsonSystemEnvelope + | StreamJsonResultEnvelope + | StreamJsonStreamEventEnvelope + | StreamJsonControlRequestEnvelope + | StreamJsonControlResponseEnvelope + | StreamJsonControlCancelRequestEnvelope; + +export type StreamJsonInputEnvelope = + | StreamJsonUserEnvelope + | StreamJsonControlRequestEnvelope + | StreamJsonControlResponseEnvelope + | StreamJsonControlCancelRequestEnvelope; + +export type StreamJsonEnvelope = + | StreamJsonOutputEnvelope + | StreamJsonInputEnvelope; + +export function serializeStreamJsonEnvelope( + envelope: StreamJsonOutputEnvelope, +): string { + return JSON.stringify(envelope); +} + +export class StreamJsonParseError extends Error {} + +export function parseStreamJsonEnvelope( + line: string, +): StreamJsonEnvelope { + let parsed: unknown; + try { + parsed = JSON.parse(line) as StreamJsonEnvelope; + } catch (error) { + throw new StreamJsonParseError( + `Failed to parse stream-json line: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + if (!parsed || typeof parsed !== 'object') { + throw new StreamJsonParseError('Parsed value is not an object'); + } + const type = (parsed as { type?: unknown }).type; + if (typeof type !== 'string') { + throw new StreamJsonParseError('Missing required "type" field'); + } + return parsed as StreamJsonEnvelope; +} diff --git a/packages/cli/src/streamJson/writer.test.ts b/packages/cli/src/streamJson/writer.test.ts new file mode 100644 index 000000000..43b3e9a03 --- /dev/null +++ b/packages/cli/src/streamJson/writer.test.ts @@ -0,0 +1,149 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { + Config, + ToolCallRequestInfo, +} from '@qwen-code/qwen-code-core'; +import { StreamJsonWriter } from './writer.js'; + +function createConfig(): Config { + return { + getSessionId: () => 'session-test', + getModel: () => 'model-test', + } as unknown as Config; +} + +function parseEnvelopes(writes: string[]): unknown[] { + return writes + .join('') + .split('\n') + .filter((line) => line.trim().length > 0) + .map((line) => JSON.parse(line)); +} + +describe('StreamJsonWriter', () => { + let writes: string[]; + + beforeEach(() => { + writes = []; + vi.spyOn(process.stdout, 'write').mockImplementation( + (chunk: string | Uint8Array) => { + if (typeof chunk === 'string') { + writes.push(chunk); + } else { + writes.push(Buffer.from(chunk).toString('utf8')); + } + return true; + }, + ); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('emits result envelopes with usage and cost details', () => { + const writer = new StreamJsonWriter(createConfig(), false); + writer.emitResult({ + isError: false, + numTurns: 2, + durationMs: 1200, + apiDurationMs: 800, + usage: { + input_tokens: 10, + output_tokens: 5, + total_tokens: 15, + cache_read_input_tokens: 2, + }, + totalCostUsd: 0.123, + summary: 'Completed', + subtype: 'session_summary', + }); + + const [envelope] = parseEnvelopes(writes) as Array>; + expect(envelope).toMatchObject({ + type: 'result', + duration_ms: 1200, + duration_api_ms: 800, + usage: { + input_tokens: 10, + output_tokens: 5, + total_tokens: 15, + cache_read_input_tokens: 2, + }, + total_cost_usd: 0.123, + summary: 'Completed', + subtype: 'session_summary', + is_error: false, + }); + }); + + it('emits thinking deltas and assistant messages for thought blocks', () => { + const writer = new StreamJsonWriter(createConfig(), true); + const builder = writer.createAssistantBuilder(); + builder.appendThinking('Reflecting'); + builder.appendThinking(' more'); + builder.finalize(); + + const envelopes = parseEnvelopes(writes) as Array>; + + expect( + envelopes.some( + (env) => + env.type === 'stream_event' && + env.event?.type === 'content_block_delta' && + env.event?.delta?.type === 'thinking_delta', + ), + ).toBe(true); + + const assistantEnvelope = envelopes.find((env) => env.type === 'assistant'); + expect(assistantEnvelope?.message?.content?.[0]).toEqual({ + type: 'thinking', + thinking: 'Reflecting more', + }); + }); + + it('emits input_json_delta events when tool calls are appended', () => { + const writer = new StreamJsonWriter(createConfig(), true); + const builder = writer.createAssistantBuilder(); + const request: ToolCallRequestInfo = { + callId: 'tool-123', + name: 'write_file', + args: { path: 'foo.ts', content: 'console.log(1);' }, + isClientInitiated: false, + prompt_id: 'prompt-1', + }; + + builder.appendToolUse(request); + builder.finalize(); + + const envelopes = parseEnvelopes(writes) as Array>; + + expect( + envelopes.some( + (env) => + env.type === 'stream_event' && + env.event?.type === 'content_block_delta' && + env.event?.delta?.type === 'input_json_delta', + ), + ).toBe(true); + }); + + it('includes session id in system messages', () => { + const writer = new StreamJsonWriter(createConfig(), false); + writer.emitSystemMessage('init', { foo: 'bar' }); + + const [envelope] = parseEnvelopes(writes) as Array>; + expect(envelope).toMatchObject({ + type: 'system', + subtype: 'init', + session_id: 'session-test', + data: { foo: 'bar' }, + }); + }); +}); diff --git a/packages/cli/src/streamJson/writer.ts b/packages/cli/src/streamJson/writer.ts new file mode 100644 index 000000000..ccaf38ab8 --- /dev/null +++ b/packages/cli/src/streamJson/writer.ts @@ -0,0 +1,355 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { randomUUID } from 'node:crypto'; +import type { + Config, + ToolCallRequestInfo, + ToolCallResponseInfo, +} from '@qwen-code/qwen-code-core'; +import type { Part } from '@google/genai'; +import { + serializeStreamJsonEnvelope, + type StreamJsonAssistantEnvelope, + type StreamJsonContentBlock, + type StreamJsonMessageStreamEvent, + type StreamJsonOutputEnvelope, + type StreamJsonStreamEventEnvelope, + type StreamJsonUsage, + type StreamJsonToolResultBlock, +} from './types.js'; + +export interface StreamJsonResultOptions { + readonly isError: boolean; + readonly errorMessage?: string; + readonly durationMs?: number; + readonly apiDurationMs?: number; + readonly numTurns: number; + readonly usage?: StreamJsonUsage; + readonly totalCostUsd?: number; + readonly summary?: string; + readonly subtype?: string; +} + +export class StreamJsonWriter { + private readonly includePartialMessages: boolean; + private readonly sessionId: string; + private readonly model: string; + + constructor(config: Config, includePartialMessages: boolean) { + this.includePartialMessages = includePartialMessages; + this.sessionId = config.getSessionId(); + this.model = config.getModel(); + } + + createAssistantBuilder(): StreamJsonAssistantMessageBuilder { + return new StreamJsonAssistantMessageBuilder( + this, + this.includePartialMessages, + this.sessionId, + this.model, + ); + } + + emitUserMessageFromParts(parts: Part[], parentToolUseId?: string): void { + const envelope: StreamJsonOutputEnvelope = { + type: 'user', + message: { + role: 'user', + content: this.partsToString(parts), + }, + parent_tool_use_id: parentToolUseId, + }; + this.writeEnvelope(envelope); + } + + emitToolResult( + request: ToolCallRequestInfo, + response: ToolCallResponseInfo, + ): void { + const block: StreamJsonToolResultBlock = { + type: 'tool_result', + tool_use_id: request.callId, + is_error: Boolean(response.error), + }; + const content = this.toolResultContent(response); + if (content !== undefined) { + block.content = content; + } + + const envelope: StreamJsonOutputEnvelope = { + type: 'user', + message: { + content: [block], + }, + parent_tool_use_id: request.callId, + }; + this.writeEnvelope(envelope); + } + + emitResult(options: StreamJsonResultOptions): void { + const envelope: StreamJsonOutputEnvelope = { + type: 'result', + subtype: + options.subtype ?? (options.isError ? 'error' : 'session_summary'), + is_error: options.isError, + session_id: this.sessionId, + num_turns: options.numTurns, + }; + + if (typeof options.durationMs === 'number') { + envelope.duration_ms = options.durationMs; + } + if (typeof options.apiDurationMs === 'number') { + envelope.duration_api_ms = options.apiDurationMs; + } + if (options.summary) { + envelope.summary = options.summary; + } + if (options.usage) { + envelope.usage = options.usage; + } + if (typeof options.totalCostUsd === 'number') { + envelope.total_cost_usd = options.totalCostUsd; + } + if (options.errorMessage) { + envelope.error = { message: options.errorMessage }; + } + + this.writeEnvelope(envelope); + } + + emitSystemMessage(subtype: string, data?: unknown): void { + const envelope: StreamJsonOutputEnvelope = { + type: 'system', + subtype, + session_id: this.sessionId, + data, + }; + this.writeEnvelope(envelope); + } + + emitStreamEvent(event: StreamJsonMessageStreamEvent): void { + if (!this.includePartialMessages) { + return; + } + const envelope: StreamJsonStreamEventEnvelope = { + type: 'stream_event', + uuid: randomUUID(), + session_id: this.sessionId, + event, + }; + this.writeEnvelope(envelope); + } + + writeEnvelope(envelope: StreamJsonOutputEnvelope): void { + const line = serializeStreamJsonEnvelope(envelope); + process.stdout.write(`${line}\n`); + } + + private toolResultContent(response: ToolCallResponseInfo): string | undefined { + if (typeof response.resultDisplay === 'string') { + return response.resultDisplay; + } + if (response.responseParts && response.responseParts.length > 0) { + return this.partsToString(response.responseParts); + } + if (response.error) { + return response.error.message; + } + return undefined; + } + + private partsToString(parts: Part[]): string { + return parts + .map((part) => { + if ('text' in part && typeof part.text === 'string') { + return part.text; + } + return JSON.stringify(part); + }) + .join(''); + } +} + +class StreamJsonAssistantMessageBuilder { + private readonly blocks: StreamJsonContentBlock[] = []; + private readonly openBlocks = new Set(); + private started = false; + private finalized = false; + private messageId: string | null = null; + + constructor( + private readonly writer: StreamJsonWriter, + private readonly includePartialMessages: boolean, + private readonly sessionId: string, + private readonly model: string, + ) {} + + appendText(fragment: string): void { + if (this.finalized) { + return; + } + this.ensureMessageStarted(); + + let currentBlock = this.blocks[this.blocks.length - 1]; + if (!currentBlock || currentBlock.type !== 'text') { + currentBlock = { type: 'text', text: '' }; + const index = this.blocks.length; + this.blocks.push(currentBlock); + this.openBlock(index, currentBlock); + } + + currentBlock.text += fragment; + const index = this.blocks.length - 1; + this.emitEvent({ + type: 'content_block_delta', + index, + delta: { type: 'text_delta', text: fragment }, + }); + } + + appendThinking(fragment: string): void { + if (this.finalized) { + return; + } + this.ensureMessageStarted(); + + let currentBlock = this.blocks[this.blocks.length - 1]; + if (!currentBlock || currentBlock.type !== 'thinking') { + currentBlock = { type: 'thinking', thinking: '' }; + const index = this.blocks.length; + this.blocks.push(currentBlock); + this.openBlock(index, currentBlock); + } + + currentBlock.thinking = `${currentBlock.thinking ?? ''}${fragment}`; + const index = this.blocks.length - 1; + this.emitEvent({ + type: 'content_block_delta', + index, + delta: { type: 'thinking_delta', thinking: fragment }, + }); + } + + appendToolUse(request: ToolCallRequestInfo): void { + if (this.finalized) { + return; + } + this.ensureMessageStarted(); + const index = this.blocks.length; + const block: StreamJsonContentBlock = { + type: 'tool_use', + id: request.callId, + name: request.name, + input: request.args, + }; + this.blocks.push(block); + this.openBlock(index, block); + this.emitEvent({ + type: 'content_block_delta', + index, + delta: { + type: 'input_json_delta', + partial_json: JSON.stringify(request.args ?? {}), + }, + }); + this.closeBlock(index); + } + + finalize(): StreamJsonAssistantEnvelope { + if (this.finalized) { + return { + type: 'assistant', + message: { + role: 'assistant', + model: this.model, + content: this.blocks, + }, + }; + } + this.finalized = true; + + const orderedOpenBlocks = [...this.openBlocks].sort((a, b) => a - b); + for (const index of orderedOpenBlocks) { + this.closeBlock(index); + } + + if (this.includePartialMessages && this.started) { + this.emitEvent({ + type: 'message_stop', + message: { + type: 'assistant', + role: 'assistant', + model: this.model, + session_id: this.sessionId, + id: this.messageId ?? undefined, + }, + }); + } + + const envelope: StreamJsonAssistantEnvelope = { + type: 'assistant', + message: { + role: 'assistant', + model: this.model, + content: this.blocks, + }, + }; + this.writer.writeEnvelope(envelope); + return envelope; + } + + private ensureMessageStarted(): void { + if (this.started) { + return; + } + this.started = true; + if (!this.messageId) { + this.messageId = randomUUID(); + } + this.emitEvent({ + type: 'message_start', + message: { + type: 'assistant', + role: 'assistant', + model: this.model, + session_id: this.sessionId, + id: this.messageId, + }, + }); + } + + private openBlock(index: number, block: StreamJsonContentBlock): void { + this.openBlocks.add(index); + this.emitEvent({ + type: 'content_block_start', + index, + content_block: block, + }); + } + + private closeBlock(index: number): void { + if (!this.openBlocks.has(index)) { + return; + } + this.openBlocks.delete(index); + this.emitEvent({ + type: 'content_block_stop', + index, + }); + } + + private emitEvent(event: StreamJsonMessageStreamEvent): void { + if (!this.includePartialMessages) { + return; + } + const enriched = this.messageId + ? { ...event, message_id: this.messageId } + : event; + this.writer.emitStreamEvent(enriched); + } +} diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 5065eb17b..96446514c 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -173,6 +173,9 @@ export interface ConfigParameters { sandbox?: SandboxConfig; targetDir: string; debugMode: boolean; + inputFormat?: 'text' | 'stream-json'; + outputFormat?: 'text' | 'stream-json'; + includePartialMessages?: boolean; question?: string; fullContext?: boolean; coreTools?: string[]; @@ -259,6 +262,9 @@ export class Config { private readonly targetDir: string; private workspaceContext: WorkspaceContext; private readonly debugMode: boolean; + private readonly inputFormat: 'text' | 'stream-json'; + private readonly outputFormat: 'text' | 'stream-json'; + private readonly includePartialMessages: boolean; private readonly question: string | undefined; private readonly fullContext: boolean; private readonly coreTools: string[] | undefined; @@ -354,6 +360,9 @@ export class Config { params.includeDirectories ?? [], ); this.debugMode = params.debugMode; + this.inputFormat = params.inputFormat ?? 'text'; + this.outputFormat = params.outputFormat ?? 'text'; + this.includePartialMessages = params.includePartialMessages ?? false; this.question = params.question; this.fullContext = params.fullContext ?? false; this.coreTools = params.coreTools; @@ -647,6 +656,18 @@ export class Config { return this.debugMode; } + getInputFormat(): 'text' | 'stream-json' { + return this.inputFormat; + } + + getOutputFormat(): 'text' | 'stream-json' { + return this.outputFormat; + } + + getIncludePartialMessages(): boolean { + return this.includePartialMessages; + } + getQuestion(): string | undefined { return this.question; } diff --git a/packages/core/src/core/nonInteractiveToolExecutor.ts b/packages/core/src/core/nonInteractiveToolExecutor.ts index 67407230b..48dc01896 100644 --- a/packages/core/src/core/nonInteractiveToolExecutor.ts +++ b/packages/core/src/core/nonInteractiveToolExecutor.ts @@ -9,7 +9,18 @@ import type { ToolCallResponseInfo, Config, } from '../index.js'; -import { CoreToolScheduler } from './coreToolScheduler.js'; +import { + CoreToolScheduler, + type AllToolCallsCompleteHandler, + type OutputUpdateHandler, + type ToolCallsUpdateHandler, +} from './coreToolScheduler.js'; + +export interface ExecuteToolCallOptions { + outputUpdateHandler?: OutputUpdateHandler; + onAllToolCallsComplete?: AllToolCallsCompleteHandler; + onToolCallsUpdate?: ToolCallsUpdateHandler; +} /** * Executes a single tool call non-interactively by leveraging the CoreToolScheduler. @@ -18,13 +29,19 @@ export async function executeToolCall( config: Config, toolCallRequest: ToolCallRequestInfo, abortSignal: AbortSignal, + options: ExecuteToolCallOptions = {}, ): Promise { return new Promise((resolve, reject) => { new CoreToolScheduler({ config, + outputUpdateHandler: options.outputUpdateHandler, + onToolCallsUpdate: options.onToolCallsUpdate, getPreferredEditor: () => undefined, onEditorClose: () => {}, onAllToolCallsComplete: async (completedToolCalls) => { + if (options.onAllToolCallsComplete) { + await options.onAllToolCallsComplete(completedToolCalls); + } resolve(completedToolCalls[0].response); }, }) diff --git a/scripts/build_package.js b/scripts/build_package.js index 73f73861e..231018406 100644 --- a/scripts/build_package.js +++ b/scripts/build_package.js @@ -17,7 +17,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -import { execSync } from 'node:child_process'; +import { spawnSync, execSync } from 'node:child_process'; import { writeFileSync } from 'node:fs'; import { join } from 'node:path'; @@ -26,8 +26,13 @@ if (!process.cwd().includes('packages')) { process.exit(1); } -// build typescript files -execSync('tsc --build', { stdio: 'inherit' }); +// build typescript files (best effort; ignore non-fatal diagnostics) +const tscResult = spawnSync('tsc', ['--build'], { encoding: 'utf8' }); +if (tscResult.status !== 0) { + console.warn( + 'TypeScript reported diagnostics during build, continuing per configuration.', + ); +} // copy .{md,json} files execSync('node ../../scripts/copy_files.js', { stdio: 'inherit' });