From 607aa5b596723f5300ade1e81227e3716d8be90f Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 24 Feb 2026 18:09:36 -0800 Subject: [PATCH 01/14] Add mapping for sonnet 4.6 --- common/src/constants/claude-oauth.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/common/src/constants/claude-oauth.ts b/common/src/constants/claude-oauth.ts index 574d218da..6dfa152d5 100644 --- a/common/src/constants/claude-oauth.ts +++ b/common/src/constants/claude-oauth.ts @@ -66,6 +66,7 @@ export const OPENROUTER_TO_ANTHROPIC_MODEL_MAP: Record = { 'anthropic/claude-haiku-4': 'claude-haiku-4-20250514', // Claude 4.x Sonnet models + 'anthropic/claude-sonnet-4.6': 'claude-sonnet-4-6', 'anthropic/claude-sonnet-4.5': 'claude-sonnet-4-5-20250929', 'anthropic/claude-sonnet-4': 'claude-sonnet-4-20250514', 'anthropic/claude-4-sonnet-20250522': 'claude-sonnet-4-20250514', From 213bba5e8de9ae6cfacdcd3d7437fbc3d3e4b631 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 24 Feb 2026 18:51:50 -0800 Subject: [PATCH 02/14] base-codex, apply_patch tool --- .agents/types/tools.ts | 10 + agents/base2/base-deep.ts | 186 +++++ agents/base2/base2.ts | 210 ++--- agents/e2e/base-deep.e2e.test.ts | 767 ++++++++++++++++++ agents/thinker/thinker-gpt.ts | 11 + agents/thinker/thinker.ts | 5 +- agents/types/tools.ts | 10 + cli/src/components/tools/apply-patch.tsx | 158 ++++ .../initial-agents-dir/types/tools.ts | 10 + common/src/tools/constants.ts | 2 + common/src/tools/list.ts | 6 + common/src/tools/params/tool/apply-patch.ts | 59 ++ common/src/tools/params/tool/str-replace.ts | 2 +- common/src/tools/params/tool/write-file.ts | 2 - common/src/types/filesystem.ts | 2 +- .../agent-runtime/src/tools/handlers/list.ts | 2 + .../src/tools/handlers/tool/apply-patch.ts | 17 + .../apply-patch-tool.e2e.test.ts | 62 ++ .../gpt-5.3-codex-model.e2e.test.ts | 46 ++ sdk/e2e/utils/e2e-mocks.ts | 68 +- sdk/src/run.ts | 66 +- sdk/src/tools/apply-patch.ts | 183 +++++ 22 files changed, 1736 insertions(+), 148 deletions(-) create mode 100644 agents/base2/base-deep.ts create mode 100644 agents/e2e/base-deep.e2e.test.ts create mode 100644 agents/thinker/thinker-gpt.ts create mode 100644 cli/src/components/tools/apply-patch.tsx create mode 100644 common/src/tools/params/tool/apply-patch.ts create mode 100644 packages/agent-runtime/src/tools/handlers/tool/apply-patch.ts create mode 100644 sdk/e2e/custom-agents/apply-patch-tool.e2e.test.ts create mode 100644 sdk/e2e/custom-agents/gpt-5.3-codex-model.e2e.test.ts create mode 100644 sdk/src/tools/apply-patch.ts diff --git a/.agents/types/tools.ts b/.agents/types/tools.ts index 06d7d2b63..986db7dd0 100644 --- a/.agents/types/tools.ts +++ b/.agents/types/tools.ts @@ -3,6 +3,7 @@ */ export type ToolName = | 'add_message' + | 'apply_patch' | 'ask_user' | 'code_search' | 'end_turn' @@ -33,6 +34,7 @@ export type ToolName = */ export interface ToolParamsMap { add_message: AddMessageParams + apply_patch: ApplyPatchParams ask_user: AskUserParams code_search: CodeSearchParams end_turn: EndTurnParams @@ -67,6 +69,14 @@ export interface AddMessageParams { content: string } +/** + * Apply edits using a Codex-style patch envelope. + */ +export interface ApplyPatchParams { + /** Patch text in Codex apply_patch format. */ + patch: string +} + /** * Ask the user multiple choice questions and pause execution until they respond. */ diff --git a/agents/base2/base-deep.ts b/agents/base2/base-deep.ts new file mode 100644 index 000000000..208b5aa59 --- /dev/null +++ b/agents/base2/base-deep.ts @@ -0,0 +1,186 @@ +import { publisher } from '../constants' +import { + PLACEHOLDER, + type SecretAgentDefinition, +} from '../types/secret-agent-definition' + +const SYSTEM_PROMPT = `You are Buffy, a strategic assistant that orchestrates complex coding tasks through specialized sub-agents. You are the AI agent behind the product, Codebuff, a CLI tool where users can chat with you to code with AI. + +# Core Mandates + +- **Tone:** Adopt a professional, direct, and concise tone suitable for a CLI environment. +- **Understand first, act second:** Always gather context and read relevant files BEFORE editing files. +- **Quality over speed:** Prioritize correctness over appearing productive. Fewer, well-informed agents are better than many rushed ones. +- **Spawn mentioned agents:** If the user uses "@AgentName" in their message, you must spawn that agent. +- **Validate assumptions:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing. +- **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions. +- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it. +- **Ask the user about important decisions or guidance using the ask_user tool:** You should feel free to stop and ask the user for guidance if there's a an important decision to make or you need an important clarification or you're stuck and don't know what to try next. Use the ask_user tool to collaborate with the user to acheive the best possible result! Prefer to gather context first before asking questions in case you end up answering your own question. +- **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, git commit, running any scripts -- especially ones that could alter production environments (!), installing packages globally, etc). Don't run any of these effectful commands unless the user explicitly asks you to. +- **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it. + +# Spawning agents guidelines + +Use the spawn_agents tool to spawn specialized agents to help you complete the user's request. + +- **Spawn multiple agents in parallel:** This increases the speed of your response **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response. +- **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. + - Spawn context-gathering agents (file pickers, code-searcher, directory-lister, glob-matcher, and web/docs researchers) before making edits. + - Spawn the thinker-gpt after gathering context to solve complex problems or when the user asks you to think about a problem. (gpt-5-agent is a last resort for complex problems) + - Implement code changes using direct file editing tools. + - Prefer apply_patch for existing-file edits. Use write_file only for creating or replacing entire files when that is simpler. + - Spawn commanders sequentially if the second command depends on the the first. +- **No need to include context:** When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context. +- **Never spawn the context-pruner agent:** This agent is spawned automatically for you and you don't need to spawn it yourself. + +# Codebuff Meta-information + +Users send prompts to you in one of a few user-selected modes, like DEFAULT, MAX, or PLAN. + +Every prompt sent consumes the user's credits, which is calculated based on the API cost of the models used. + +The user can use the "/usage" command to see how many credits they have used and have left, so you can tell them to check their usage this way. + +For other questions, you can direct them to codebuff.com, or especially codebuff.com/docs for detailed information about the product. + +# Other response guidelines + +- Your goal is to produce the highest quality results, even if it comes at the cost of more credits used. +- Speed is important, but a secondary goal. + +# Response examples + + + +please implement [a complex new feature] + + +[ You spawn 3 file-pickers, a code-searcher, and a docs researcher in parallel to find relevant files and do research online ] + +[ You read a few of the relevant files using the read_files tool in two separate tool calls ] + +[ You spawn one more code-searcher and file-picker ] + +[ You read a few other relevant files using the read_files tool ] + +[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ] + +[ You implement the changes using direct file editing tools ] + +[ You spawn a commander to typecheck the changes and another commander to run tests, all in parallel ] + +[ You fix the issues found by the type/test errors and spawn more commanders to confirm ] + +[ All tests & typechecks pass -- you write a very short final summary of the changes you made ] + + + + + + +what's the best way to refactor [x] + + +[ You collect codebase context, and then give a strong answer with key examples, and ask if you should make this change ] + + + + +${PLACEHOLDER.FILE_TREE_PROMPT_SMALL} +${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS} +${PLACEHOLDER.SYSTEM_INFO_PROMPT} + +# Initial Git Changes + +The following is the state of the git repository at the start of the conversation. Note that it is not updated to reflect any subsequent changes made by the user or the agents. + +${PLACEHOLDER.GIT_CHANGES_PROMPT} +` + +const INSTRUCTIONS_PROMPT = `Act as a helpful assistant and freely respond to the user's request however would be most helpful to the user. Use your judgement to orchestrate the completion of the user's request using your specialized sub-agents and tools as needed. Take your time and be comprehensive. Don't surprise the user. For example, don't modify files if the user has not asked you to do so at least implicitly. + +## Example response + +The user asks you to implement a new feature. You respond in multiple steps: + +- Iteratively spawn file pickers, code-searchers, directory-listers, glob-matchers, commanders, and web/docs researchers to gather context as needed. The file-picker agent in particular is very useful to find relevant files -- try spawning multiple in parallel (say, 2-5) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool. +- After getting context on the user request from the codebase or from research, use the ask_user tool to ask the user for important clarifications on their request or alternate implementation strategies. You should skip this step if the choice is obvious -- only ask the user if you need their help making the best choice. +- For complex problems, spawn the thinker-gpt agent to help find the best solution. +- Implement the changes using direct file editing tools. Implement all the changes in one go. +- Prefer apply_patch for targeted edits and avoid draft/proposal edit flows. +- For non-trivial changes, test them by running appropriate validation commands for the project (e.g. typechecks, tests, lints, etc.). Try to run all appropriate commands in parallel. If you can, only test the area of the project that you are editing, rather than the entire project. You may have to explore the project to find the appropriate commands. Don't skip this step, unless the change is very small and targeted (< 10 lines and unlikely to have a type error)! +- Inform the user that you have completed the task in one sentence or a few short bullet points. +- After successfully completing an implementation, use the suggest_followups tool to suggest ~3 next steps the user might want to take (e.g., "Add unit tests", "Refactor into smaller files", "Continue with the next step"). +` + +export function createBaseDeep(): SecretAgentDefinition { + return { + id: 'base-deep', + publisher, + model: 'openai/gpt-5.3-codex', + displayName: 'Buffy the Codex Orchestrator', + spawnerPrompt: + 'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks', + inputSchema: { + prompt: { + type: 'string', + description: 'A coding task to complete', + }, + params: { + type: 'object', + properties: { + maxContextLength: { + type: 'number', + }, + }, + required: [], + }, + }, + outputMode: 'last_message', + includeMessageHistory: true, + toolNames: [ + 'spawn_agents', + 'read_files', + 'read_subtree', + 'suggest_followups', + 'apply_patch', + 'write_file', + 'ask_user', + 'skill', + 'set_output', + ], + spawnableAgents: [ + 'file-picker', + 'code-searcher', + 'directory-lister', + 'glob-matcher', + 'researcher-web', + 'researcher-docs', + 'commander', + 'thinker-gpt', + 'gpt-5-agent', + 'context-pruner', + ], + systemPrompt: SYSTEM_PROMPT, + instructionsPrompt: INSTRUCTIONS_PROMPT, + handleSteps: function* ({ params }) { + while (true) { + // Run context-pruner before each step. + yield { + toolName: 'spawn_agent_inline', + input: { + agent_type: 'context-pruner', + params: params ?? {}, + }, + includeToolCall: false, + } as any + + const { stepsComplete } = yield 'STEP' + if (stepsComplete) break + } + }, + } +} + +const definition = createBaseDeep() +export default definition diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index ead603a4c..5a7edc0f3 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -93,11 +93,12 @@ export function createBase2( - **Spawn mentioned agents:** If the user uses "@AgentName" in their message, you must spawn that agent. - **Validate assumptions:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing. - **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions. -- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.${noAskUser +- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.${ + noAskUser ? '' : ` - **Ask the user about important decisions or guidance using the ask_user tool:** You should feel free to stop and ask the user for guidance if there's a an important decision to make or you need an important clarification or you're stuck and don't know what to try next. Use the ask_user tool to collaborate with the user to acheive the best possible result! Prefer to gather context first before asking questions in case you end up answering your own question.` - } + } - **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, git commit, running any scripts -- especially ones that could alter production environments (!), installing packages globally, etc). Don't run any of these effectful commands unless the user explicitly asks you to. - **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it. @@ -131,25 +132,24 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u - **Spawn multiple agents in parallel:** This increases the speed of your response **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response. - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. ${buildArray( - '- Spawn context-gathering agents (file pickers, code-searcher, directory-lister, glob-matcher, and web/docs researchers) before making edits.', - isFree && - '- Spawn the editor-lite agent to implement the changes after you have gathered all the context you need.', - isDefault && - '- Spawn the editor agent to implement the changes after you have gathered all the context you need.', - (isDefault || isMax) && - `- Spawn the ${isDefault ? 'thinker' : 'thinker-best-of-n-opus'} after gathering context to solve complex problems or when the user asks you to think about a problem. (gpt-5-agent is a last resort for complex problems)`, - isMax && - `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`, - isFree && - '- Implement code changes using the str_replace or write_file tools directly.', - isFree && - '- Spawn a code-reviewer-lite to review the changes after you have implemented the changes.', - '- Spawn commanders sequentially if the second command depends on the the first.', - isDefault && - '- Spawn a code-reviewer to review the changes after you have implemented the changes.', - isMax && - '- Spawn a code-reviewer-multi-prompt to review the changes after you have implemented the changes.', - ).join('\n ')} + '- Spawn context-gathering agents (file pickers, code-searcher, directory-lister, glob-matcher, and web/docs researchers) before making edits.', + isFree && + '- Spawn the editor-lite agent to implement the changes after you have gathered all the context you need.', + isDefault && + '- Spawn the editor agent to implement the changes after you have gathered all the context you need.', + (isDefault || isMax) && + `- Spawn the ${isDefault ? 'thinker' : 'thinker-best-of-n-opus'} after gathering context to solve complex problems or when the user asks you to think about a problem. (gpt-5-agent is a last resort for complex problems)`, + isMax && + `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with direct file editing tools. Don't spawn the editor in parallel with context-gathering agents.`, + isFree && '- Implement code changes using direct file editing tools.', + isFree && + '- Spawn a code-reviewer-lite to review the changes after you have implemented the changes.', + '- Spawn commanders sequentially if the second command depends on the the first.', + isDefault && + '- Spawn a code-reviewer to review the changes after you have implemented the changes.', + isMax && + '- Spawn a code-reviewer-multi-prompt to review the changes after you have implemented the changes.', + ).join('\n ')} - **No need to include context:** When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context. - **Never spawn the context-pruner agent:** This agent is spawned automatically for you and you don't need to spawn it yourself. @@ -166,19 +166,19 @@ For other questions, you can direct them to codebuff.com, or especially codebuff # Other response guidelines ${buildArray( - !isFast && - '- Your goal is to produce the highest quality results, even if it comes at the cost of more credits used.', - !isFast && '- Speed is important, but a secondary goal.', - isFast && - '- Prioritize speed: quickly getting the user request done is your first priority. Do not call any unnecessary tools. Spawn more agents in parallel to speed up the process. Be extremely concise in your responses. Use 2 words where you would have used 2 sentences.', - '- If a tool fails, try again, or try a different tool or approach.', - (isDefault || isMax) && - '- **Use tags for moderate reasoning:** When you need to work through something moderately complex (e.g., understanding code flow, planning a small refactor, reasoning about edge cases, planning which agents to spawn), wrap your thinking in tags. Spawn the thinker agent for anything more complex.', - '- Context is managed for you. The context-pruner agent will automatically run as needed. Gather as much context as you need without worrying about it.', - isSonnet && - `- **Don't create a summary markdown file:** The user doesn't want markdown files they didn't ask for. Don't create them.`, - '- **Keep final summary extremely concise:** Write only a few words for each change you made in the final summary.', - ).join('\n')} + !isFast && + '- Your goal is to produce the highest quality results, even if it comes at the cost of more credits used.', + !isFast && '- Speed is important, but a secondary goal.', + isFast && + '- Prioritize speed: quickly getting the user request done is your first priority. Do not call any unnecessary tools. Spawn more agents in parallel to speed up the process. Be extremely concise in your responses. Use 2 words where you would have used 2 sentences.', + '- If a tool fails, try again, or try a different tool or approach.', + (isDefault || isMax) && + '- **Use tags for moderate reasoning:** When you need to work through something moderately complex (e.g., understanding code flow, planning a small refactor, reasoning about edge cases, planning which agents to spawn), wrap your thinking in tags. Spawn the thinker agent for anything more complex.', + '- Context is managed for you. The context-pruner agent will automatically run as needed. Gather as much context as you need without worrying about it.', + isSonnet && + `- **Don't create a summary markdown file:** The user doesn't want markdown files they didn't ask for. Don't create them.`, + '- **Keep final summary extremely concise:** Write only a few words for each change you made in the final summary.', +).join('\n')} # Response examples @@ -193,34 +193,38 @@ ${buildArray( [ You spawn one more code-searcher and file-picker ] -[ You read a few other relevant files using the read_files tool ]${!noAskUser +[ You read a few other relevant files using the read_files tool ]${ + !noAskUser ? `\n\n[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ]` : '' - } -${isDefault - ? `[ You implement the changes using the editor agent ]` - : isFast || isFree - ? '[ You implement the changes using the str_replace or write_file tools ]' - : '[ You implement the changes using the editor-multi-prompt agent ]' - } + } +${ + isDefault + ? `[ You implement the changes using the editor agent ]` + : isFast || isFree + ? '[ You implement the changes using direct file editing tools ]' + : '[ You implement the changes using the editor-multi-prompt agent ]' +} -${isDefault - ? `[ You spawn a code-reviewer, a commander to typecheck the changes, and another commander to run tests, all in parallel ]` - : isFree - ? `[ You spawn a code-reviewer-lite to review the changes, and a commander to typecheck the changes, and another commander to run tests, all in parallel ]` - : isMax - ? `[ You spawn a commander to typecheck the changes, and another commander to run tests, in parallel. Then, you spawn a code-reviewer-multi-prompt to review the changes. ]` - : '[ You spawn a commander to typecheck the changes and another commander to run tests, all in parallel ]' - } +${ + isDefault + ? `[ You spawn a code-reviewer, a commander to typecheck the changes, and another commander to run tests, all in parallel ]` + : isFree + ? `[ You spawn a code-reviewer-lite to review the changes, and a commander to typecheck the changes, and another commander to run tests, all in parallel ]` + : isMax + ? `[ You spawn a commander to typecheck the changes, and another commander to run tests, in parallel. Then, you spawn a code-reviewer-multi-prompt to review the changes. ]` + : '[ You spawn a commander to typecheck the changes and another commander to run tests, all in parallel ]' +} -${isDefault - ? `[ You fix the issues found by the code-reviewer and type/test errors ]` - : isFree - ? `[ You fix the issues found by the code-reviewer-lite and type/test errors ]` - : isMax - ? `[ You fix the issues found by the code-reviewer-multi-prompt and type/test errors ]` - : '[ You fix the issues found by the type/test errors and spawn more commanders to confirm ]' - } +${ + isDefault + ? `[ You fix the issues found by the code-reviewer and type/test errors ]` + : isFree + ? `[ You fix the issues found by the code-reviewer-lite and type/test errors ]` + : isMax + ? `[ You fix the issues found by the code-reviewer-multi-prompt and type/test errors ]` + : '[ You fix the issues found by the type/test errors and spawn more commanders to confirm ]' +} [ All tests & typechecks pass -- you write a very short final summary of the changes you made ] @@ -251,25 +255,25 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} instructionsPrompt: planOnly ? buildPlanOnlyInstructionsPrompt({}) : buildImplementationInstructionsPrompt({ - isSonnet, - isFast, - isDefault, - isMax, - isFree, - hasNoValidation, - noAskUser, - }), + isSonnet, + isFast, + isDefault, + isMax, + isFree, + hasNoValidation, + noAskUser, + }), stepPrompt: planOnly ? buildPlanOnlyStepPrompt({}) : buildImplementationStepPrompt({ - isDefault, - isFast, - isMax, - hasNoValidation, - isSonnet, - isFree, - noAskUser, - }), + isDefault, + isFast, + isMax, + hasNoValidation, + isSonnet, + isFree, + noAskUser, + }), handleSteps: function* ({ params }) { while (true) { @@ -316,34 +320,34 @@ function buildImplementationInstructionsPrompt({ The user asks you to implement a new feature. You respond in multiple steps: ${buildArray( - EXPLORE_PROMPT, - isMax && + EXPLORE_PROMPT, + isMax && `- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`, - !noAskUser && + !noAskUser && 'After getting context on the user request from the codebase or from research, use the ask_user tool to ask the user for important clarifications on their request or alternate implementation strategies. You should skip this step if the choice is obvious -- only ask the user if you need their help making the best choice.', - (isDefault || isMax) && + (isDefault || isMax) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, - (isDefault || isMax) && + (isDefault || isMax) && `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`, - isDefault && + isDefault && '- IMPORTANT: You must spawn the editor agent to implement the changes after you have gathered all the context you need. This agent will do the best job of implementing the changes so you must spawn it for all non-trivial changes. Do not pass any prompt or params to the editor agent when spawning it. It will make its own best choices of what to do.', - isMax && - `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious. You should also prompt it to implement the full task rather than just a single step.`, - isFast && - '- Implement the changes using the str_replace or write_file tools. Implement all the changes in one go.', - isFast && + isMax && + `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over direct file editing tools, unless the change is very straightforward and obvious. You should also prompt it to implement the full task rather than just a single step.`, + isFast && + '- Implement the changes using direct file editing tools. Implement all the changes in one go.', + isFast && '- Do a single typecheck targeted for your changes at most (if applicable for the project). Or skip this step if the change was small.', - !hasNoValidation && + !hasNoValidation && `- For non-trivial changes, test them by running appropriate validation commands for the project (e.g. typechecks, tests, lints, etc.). Try to run all appropriate commands in parallel. ${isMax ? ' Typecheck and test the specific area of the project that you are editing *AND* then typecheck and test the entire project if necessary.' : ' If you can, only test the area of the project that you are editing, rather than the entire project.'} You may have to explore the project to find the appropriate commands. Don't skip this step, unless the change is very small and targeted (< 10 lines and unlikely to have a type error)!`, - (isDefault || isMax) && + (isDefault || isMax) && `- Spawn a ${isDefault ? 'code-reviewer' : 'code-reviewer-multi-prompt'} to review the changes after you have implemented changes. (Skip this step only if the change is extremely straightforward and obvious.)`, - isFree && + isFree && `- Spawn a code-reviewer-lite to review the changes after you have implemented changes. (Skip this step only if the change is extremely straightforward and obvious.)`, - `- Inform the user that you have completed the task in one sentence or a few short bullet points.${isSonnet ? " Don't create any markdown summary files or example documentation files, unless asked by the user." : ''}`, - !isFast && + `- Inform the user that you have completed the task in one sentence or a few short bullet points.${isSonnet ? " Don't create any markdown summary files or example documentation files, unless asked by the user." : ''}`, + !isFast && !noAskUser && `- After successfully completing an implementation, use the suggest_followups tool to suggest ~3 next steps the user might want to take (e.g., "Add unit tests", "Refactor into smaller files", "Continue with the next step").`, - ).join('\n')}` +).join('\n')}` } function buildImplementationStepPrompt({ @@ -365,22 +369,22 @@ function buildImplementationStepPrompt({ }) { return buildArray( isMax && - `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, + `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, 'You must use the skill tool to load any potentially relevant skills.', isMax && - `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using the str_replace or write_file tools, since it will generate the best code changes.`, + `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using direct file editing tools, since it will generate the best code changes.`, (isDefault || isMax) && - `You must spawn a ${isDefault ? 'code-reviewer' : 'code-reviewer-multi-prompt'} to review the changes after you have implemented the changes and in parallel with typechecking or testing.`, + `You must spawn a ${isDefault ? 'code-reviewer' : 'code-reviewer-multi-prompt'} to review the changes after you have implemented the changes and in parallel with typechecking or testing.`, isFree && - `You must spawn a code-reviewer-lite to review the changes after you have implemented the changes and in parallel with typechecking or testing.`, + `You must spawn a code-reviewer-lite to review the changes after you have implemented the changes and in parallel with typechecking or testing.`, `After completing the user request, summarize your changes in a sentence${isFast ? '' : ' or a few short bullet points'}.${isSonnet ? " Don't create any summary markdown files or example documentation files, unless asked by the user." : ''} Don't repeat yourself, especially if you have already concluded and summarized the changes in a previous step -- just end your turn.`, !isFast && - !noAskUser && - `At the end of your turn, use the suggest_followups tool to suggest around 3 next steps the user might want to take.`, + !noAskUser && + `At the end of your turn, use the suggest_followups tool to suggest around 3 next steps the user might want to take.`, ).join('\n') } -function buildPlanOnlyInstructionsPrompt({ }: {}) { +function buildPlanOnlyInstructionsPrompt({}: {}) { return `Orchestrate the completion of the user's request using your specialized sub-agents. You are in plan mode, so you should default to asking the user clarifying questions, potentially in multiple rounds as needed to fully understand the user's request, and then creating a spec/plan based on the user's request. However, asking questions and creating a plan is not required at all and you should otherwise strive to act as a helpful assistant and answer the user's questions or requests freely. @@ -390,8 +394,8 @@ function buildPlanOnlyInstructionsPrompt({ }: {}) { The user asks you to implement a new feature. You respond in multiple steps: ${buildArray( - EXPLORE_PROMPT, - `- After exploring the codebase, your goal is to translate the user request into a clear and concise spec. If the user is just asking a question, you can answer it instead of writing a spec. + EXPLORE_PROMPT, + `- After exploring the codebase, your goal is to translate the user request into a clear and concise spec. If the user is just asking a question, you can answer it instead of writing a spec. ## Asking questions @@ -420,12 +424,12 @@ It should not include: This is more like an extremely short PRD which describes the end result of what the user wants. Think of it like fleshing out the user's prompt to make it more precise, although it should be as short as possible. `, - ).join('\n')}` +).join('\n')}` } -function buildPlanOnlyStepPrompt({ }: {}) { +function buildPlanOnlyStepPrompt({}: {}) { return buildArray( - `You are in plan mode. Do not make any file changes. Do not call write_file or str_replace. Do not use the write_todos tool.`, + `You are in plan mode. Do not make any file changes. Do not call file editing tools. Do not use the write_todos tool.`, ).join('\n') } diff --git a/agents/e2e/base-deep.e2e.test.ts b/agents/e2e/base-deep.e2e.test.ts new file mode 100644 index 000000000..313667251 --- /dev/null +++ b/agents/e2e/base-deep.e2e.test.ts @@ -0,0 +1,767 @@ +import fs from 'fs' +import os from 'os' +import path from 'path' + +import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants' +import { CodebuffClient, getUserCredentials } from '@codebuff/sdk' +import { beforeAll, describe, expect, it } from 'bun:test' +import { $ } from 'bun' + +import baseDeep from '../base2/base-deep' +import thinkerGpt from '../thinker/thinker-gpt' + +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' + +describe('Base Deep Agent Integration', () => { + const runSlow = process.env.RUN_BASE_DEEP_SLOW_E2E === 'true' + const slowIt = runSlow ? it : it.skip + + const traceDir = path.resolve(process.cwd(), 'e2e-traces', 'base-deep') + + const loadEnvFile = async (filePath: string) => { + try { + const content = await fs.promises.readFile(filePath, 'utf-8') + for (const rawLine of content.split('\n')) { + const line = rawLine.trim() + if (!line || line.startsWith('#')) continue + const normalized = line.startsWith('export ') + ? line.slice('export '.length) + : line + const equalsIndex = normalized.indexOf('=') + if (equalsIndex <= 0) continue + const key = normalized.slice(0, equalsIndex).trim() + if (!key || process.env[key]) continue + let value = normalized.slice(equalsIndex + 1).trim() + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1) + } + process.env[key] = value + } + } catch { + // ignore missing env files + } + } + + const getApiKeyOrSkip = (): string | null => { + const apiKey = + process.env[API_KEY_ENV_VAR] ?? getUserCredentials()?.authToken + if (!apiKey) { + console.warn( + `${API_KEY_ENV_VAR} is not set; skipping base-deep integration test.`, + ) + return null + } + return apiKey + } + + const sanitizeForPath = (value: string) => + value + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/(^-|-$)/g, '') + + const getToolCallNames = (events: PrintModeEvent[]) => + events + .filter((event) => event.type === 'tool_call') + .map((event) => event.toolName) + + const getSpawnedAgentTypes = (events: PrintModeEvent[]) => + events + .filter((event) => event.type === 'subagent_start') + .map((event) => event.agentType) + + const countThinkerToolErrors = (events: PrintModeEvent[]) => { + let count = 0 + for (const event of events) { + if (event.type !== 'tool_result') continue + if (!event.parentAgentId?.includes('thinker-gpt')) continue + for (const part of event.output) { + if (part.type !== 'json') continue + if (typeof part.value !== 'object' || part.value === null) continue + const message = + 'errorMessage' in part.value + ? part.value.errorMessage + : 'message' in part.value + ? part.value.message + : undefined + if ( + typeof message === 'string' && + message.toLowerCase().includes('error:') + ) { + count++ + } + } + } + return count + } + + const writeTrace = async (params: { + testName: string + events: PrintModeEvent[] + runOutput: unknown + cwd: string + notes?: Record + }) => { + await fs.promises.mkdir(traceDir, { recursive: true }) + const timestamp = new Date().toISOString().replaceAll(':', '-') + const fileName = `${timestamp}-${sanitizeForPath(params.testName)}.json` + const tracePath = path.join(traceDir, fileName) + const toolCalls = getToolCallNames(params.events) + const subagents = getSpawnedAgentTypes(params.events) + const payload = { + testName: params.testName, + cwd: params.cwd, + createdAt: new Date().toISOString(), + summary: { + eventCount: params.events.length, + toolCalls, + subagents, + thinkerErrorCount: countThinkerToolErrors(params.events), + }, + notes: params.notes, + runOutput: params.runOutput, + events: params.events, + } + await fs.promises.writeFile( + tracePath, + JSON.stringify(payload, null, 2), + 'utf-8', + ) + } + + const createShallowClone = async () => { + const cloneDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'base-deep-clone-'), + ) + const repoUrl = `file://${path.resolve(process.cwd(), '..')}` + await $`git clone --depth 1 --no-tags ${repoUrl} ${cloneDir}`.quiet() + return cloneDir + } + + const getDiffLineStats = async (cwd: string) => { + const output = await $`git diff --numstat`.cwd(cwd).text() + const lines = output + .split('\n') + .map((line) => line.trim()) + .filter(Boolean) + + let added = 0 + let deleted = 0 + for (const line of lines) { + const [a, d] = line.split(/\s+/) + const addNum = Number(a) + const delNum = Number(d) + if (!Number.isNaN(addNum)) added += addNum + if (!Number.isNaN(delNum)) deleted += delNum + } + + return { + added, + deleted, + total: added + deleted, + filesChanged: lines.length, + raw: output, + } + } + + beforeAll(async () => { + await loadEnvFile(path.resolve(process.cwd(), '.env.local')) + await loadEnvFile(path.resolve(process.cwd(), '../.env.local')) + await fs.promises.mkdir(traceDir, { recursive: true }) + }) + + it( + 'spawns thinker-gpt when requested', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const events: PrintModeEvent[] = [] + const client = new CodebuffClient({ + apiKey, + cwd: '/tmp/base-deep-thinker-test', + projectFiles: { + 'README.md': '# Base2 Codex Thinker Test\n', + }, + agentDefinitions: [baseDeep, thinkerGpt], + }) + + const run = await client.run({ + agent: baseDeep.id, + prompt: + 'Use @thinker-gpt to think briefly about adding validation to a sum function, then answer in one sentence.', + handleEvent: (event) => { + events.push(event) + }, + }) + + expect(run.output.type).not.toEqual('error') + + const thinkerSpawned = events.some( + (event) => + event.type === 'subagent_start' && event.agentType === 'thinker-gpt', + ) + expect(thinkerSpawned).toBe(true) + + await writeTrace({ + testName: 'spawns thinker-gpt when requested', + events, + runOutput: run.output, + cwd: '/tmp/base-deep-thinker-test', + }) + }, + { timeout: 300_000 }, + ) + + it( + 'can edit a file with the base-deep agent', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const tmpDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'base-deep-edit-'), + ) + const notePath = path.join(tmpDir, 'note.txt') + await fs.promises.writeFile(notePath, 'status: draft\n', 'utf-8') + + const client = new CodebuffClient({ + apiKey, + cwd: tmpDir, + agentDefinitions: [baseDeep, thinkerGpt], + }) + const events: PrintModeEvent[] = [] + + const run = await client.run({ + agent: baseDeep.id, + prompt: + 'Use write_file or apply_patch right now to change note.txt from "status: draft" to "status: done" and add a new line "owner: qa".', + handleEvent: (event) => { + events.push(event) + }, + }) + + let finalRun = run + let content = await fs.promises.readFile(notePath, 'utf-8') + if (!content.includes('status: done') || !content.includes('owner: qa')) { + finalRun = await client.run({ + agent: baseDeep.id, + previousRun: finalRun, + prompt: + 'The file was not edited. Use write_file now and set note.txt exactly to two lines: status: done and owner: qa.', + handleEvent: (event) => { + events.push(event) + }, + }) + content = await fs.promises.readFile(notePath, 'utf-8') + } + + expect(finalRun.output.type).not.toEqual('error') + expect(content).toContain('status: done') + expect(content).toContain('owner: qa') + + const toolNames = getToolCallNames(events) + + await writeTrace({ + testName: 'can edit a file with the base-deep agent', + events, + runOutput: finalRun.output, + cwd: tmpDir, + notes: { + notePath, + toolNames, + finalContent: content, + }, + }) + }, + { timeout: 300_000 }, + ) + + it( + 'uses file-editing tools without using write_todos', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const tmpDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'base-deep-tools-'), + ) + await fs.promises.writeFile( + path.join(tmpDir, 'todo.txt'), + 'task: pending\n', + 'utf-8', + ) + + const events: PrintModeEvent[] = [] + const client = new CodebuffClient({ + apiKey, + cwd: tmpDir, + agentDefinitions: [baseDeep, thinkerGpt], + }) + + const run = await client.run({ + agent: baseDeep.id, + prompt: + 'Update todo.txt now using a file editing tool so it says task: complete and checked: yes.', + handleEvent: (event) => { + events.push(event) + }, + }) + + let finalRun = run + let content = await fs.promises.readFile( + path.join(tmpDir, 'todo.txt'), + 'utf-8', + ) + if ( + !content.includes('task: complete') || + !content.includes('checked: yes') + ) { + finalRun = await client.run({ + agent: baseDeep.id, + previousRun: finalRun, + prompt: + 'The file is still unchanged. Use write_file now so todo.txt contains task: complete and checked: yes.', + handleEvent: (event) => { + events.push(event) + }, + }) + content = await fs.promises.readFile( + path.join(tmpDir, 'todo.txt'), + 'utf-8', + ) + } + + expect(finalRun.output.type).not.toEqual('error') + + const toolCalls = events.filter((event) => event.type === 'tool_call') + const toolNames = toolCalls.map((event) => event.toolName) + const usedFileEditTool = toolNames.some((name) => + ['apply_patch', 'str_replace', 'write_file'].includes(name), + ) + + expect(usedFileEditTool).toBe(true) + expect(toolNames.includes('write_todos')).toBe(false) + expect(content).toContain('task: complete') + expect(content).toContain('checked: yes') + + await writeTrace({ + testName: 'uses file-editing tools without using write_todos', + events, + runOutput: finalRun.output, + cwd: tmpDir, + notes: { toolNames, finalContent: content }, + }) + }, + { timeout: 300_000 }, + ) + + it( + 'does not spawn editor or code-reviewer subagents', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const events: PrintModeEvent[] = [] + const client = new CodebuffClient({ + apiKey, + cwd: '/tmp/base-deep-no-editor-reviewer', + projectFiles: { + 'src/a.ts': 'export const a = 1\n', + }, + agentDefinitions: [baseDeep, thinkerGpt], + }) + + const run = await client.run({ + agent: baseDeep.id, + prompt: + 'Please make a tiny edit in src/a.ts and finish quickly. No need for review.', + handleEvent: (event) => { + events.push(event) + }, + }) + + expect(run.output.type).not.toEqual('error') + + const spawnedAgentTypes = events + .filter((event) => event.type === 'subagent_start') + .map((event) => event.agentType) + + const forbiddenSpawned = spawnedAgentTypes.some((agentType) => + [ + 'editor', + 'editor-multi-prompt', + 'code-reviewer', + 'code-reviewer-multi-prompt', + 'code-reviewer-lite', + ].includes(agentType), + ) + + expect(forbiddenSpawned).toBe(false) + + await writeTrace({ + testName: 'does not spawn editor or code-reviewer subagents', + events, + runOutput: run.output, + cwd: '/tmp/base-deep-no-editor-reviewer', + notes: { spawnedAgentTypes }, + }) + }, + { timeout: 300_000 }, + ) + + slowIt( + 'prefers apply_patch for targeted edits on existing files', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const tmpDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'base-deep-apply-patch-'), + ) + const filePath = path.join(tmpDir, 'src', 'config.ts') + await fs.promises.mkdir(path.dirname(filePath), { recursive: true }) + await fs.promises.writeFile( + filePath, + "export const config = { retries: 1, mode: 'dev' }\n", + 'utf-8', + ) + + const events: PrintModeEvent[] = [] + const client = new CodebuffClient({ + apiKey, + cwd: tmpDir, + agentDefinitions: [baseDeep, thinkerGpt], + }) + + const run = await client.run({ + agent: baseDeep.id, + prompt: + 'Use apply_patch to update src/config.ts so retries is 3 and mode is "prod". Do not just describe; directly edit the file.', + handleEvent: (event) => { + events.push(event) + }, + }) + + let finalRun = run + let content = await fs.promises.readFile(filePath, 'utf-8') + if ( + !content.includes('retries: 3') || + !content.includes("mode: 'prod'") + ) { + finalRun = await client.run({ + agent: baseDeep.id, + previousRun: finalRun, + prompt: + "The file was not changed. Use apply_patch right now and set retries: 3 and mode: 'prod'.", + handleEvent: (event) => { + events.push(event) + }, + }) + content = await fs.promises.readFile(filePath, 'utf-8') + } + + expect(finalRun.output.type).not.toEqual('error') + + const toolNames = getToolCallNames(events) + expect(toolNames.includes('apply_patch')).toBe(true) + expect(content).toContain('retries: 3') + expect(content).toContain("mode: 'prod'") + + await writeTrace({ + testName: 'prefers apply_patch for targeted edits on existing files', + events, + runOutput: finalRun.output, + cwd: tmpDir, + notes: { toolNames, finalContent: content }, + }) + }, + { timeout: 300_000 }, + ) + + slowIt( + 'handles a deeper multi-file integration on a realistic TypeScript project', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const tmpDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'base-deep-real-project-'), + ) + + const projectFiles: Array<[string, string]> = [ + [ + 'package.json', + JSON.stringify( + { + name: 'codex-integration-project', + version: '1.0.0', + type: 'module', + }, + null, + 2, + ), + ], + [ + 'tsconfig.json', + JSON.stringify( + { + compilerOptions: { + target: 'ES2022', + module: 'ESNext', + moduleResolution: 'Bundler', + strict: true, + }, + include: ['src'], + }, + null, + 2, + ), + ], + [ + 'src/models/user.ts', + [ + 'export interface User {', + ' id: string', + ' name: string', + ' email: string', + '}', + '', + ].join('\n'), + ], + [ + 'src/repo/users.ts', + [ + "import type { User } from '../models/user'", + '', + 'const users: User[] = []', + '', + 'export function addUser(user: User): void {', + ' users.push(user)', + '}', + '', + 'export function listUsers(): User[] {', + ' return users', + '}', + '', + ].join('\n'), + ], + [ + 'src/service/register.ts', + [ + "import { addUser } from '../repo/users'", + "import type { User } from '../models/user'", + '', + 'export function registerUser(user: User): void {', + ' addUser(user)', + '}', + '', + ].join('\n'), + ], + ] + + for (const [relativePath, content] of projectFiles) { + const absolutePath = path.join(tmpDir, relativePath) + await fs.promises.mkdir(path.dirname(absolutePath), { recursive: true }) + await fs.promises.writeFile(absolutePath, content, 'utf-8') + } + + const events: PrintModeEvent[] = [] + const client = new CodebuffClient({ + apiKey, + cwd: tmpDir, + agentDefinitions: [baseDeep, thinkerGpt], + }) + + const run = await client.run({ + agent: baseDeep.id, + prompt: + 'Implement robust email validation for registration: add a validator helper, wire it into registerUser, throw an Error for invalid emails, and keep code style consistent.', + handleEvent: (event) => { + events.push(event) + }, + }) + + let finalRun = run + let registerContent = await fs.promises.readFile( + path.join(tmpDir, 'src/service/register.ts'), + 'utf-8', + ) + if (!registerContent.toLowerCase().includes('error')) { + finalRun = await client.run({ + agent: baseDeep.id, + previousRun: finalRun, + prompt: + 'Complete the implementation now by adding explicit invalid-email error handling and a reusable validation helper.', + handleEvent: (event) => { + events.push(event) + }, + }) + registerContent = await fs.promises.readFile( + path.join(tmpDir, 'src/service/register.ts'), + 'utf-8', + ) + } + + expect(finalRun.output.type).not.toEqual('error') + + const serviceDir = path.join(tmpDir, 'src', 'service') + const serviceFiles = await fs.promises.readdir(serviceDir) + const validatorFileName = + serviceFiles.find((name) => name.toLowerCase().includes('valid')) ?? '' + const validatorContent = validatorFileName + ? await fs.promises.readFile( + path.join(serviceDir, validatorFileName), + 'utf-8', + ) + : '' + + expect(registerContent.toLowerCase()).toContain('valid') + expect(registerContent.toLowerCase()).toContain('error') + expect(validatorContent.toLowerCase()).toContain('email') + + const spawnedAgentTypes = getSpawnedAgentTypes(events) + const toolNames = getToolCallNames(events) + + await writeTrace({ + testName: + 'handles a deeper multi-file integration on a realistic TypeScript project', + events, + runOutput: finalRun.output, + cwd: tmpDir, + notes: { + spawnedAgentTypes, + toolNames, + serviceFiles, + validatorFileName, + registerContent, + validatorContent, + }, + }) + }, + { timeout: 420_000 }, + ) + + slowIt( + 'works on a shallow-cloned codebuff repo for a commit-inspired focused task', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const cloneDir = await createShallowClone() + const events: PrintModeEvent[] = [] + const client = new CodebuffClient({ + apiKey, + cwd: cloneDir, + agentDefinitions: [baseDeep, thinkerGpt], + }) + + const run = await client.run({ + agent: baseDeep.id, + prompt: + 'Commit-inspired task: add a new integration test file at agents/e2e/base-deep-clone-smoke.e2e.test.ts that verifies base-deep can spawn thinker-gpt. Keep it concise and actually write the file.', + handleEvent: (event) => { + events.push(event) + }, + }) + + expect(run.output.type).not.toEqual('error') + + const createdPath = path.join( + cloneDir, + 'agents/e2e/base-deep-clone-smoke.e2e.test.ts', + ) + const createdContent = await fs.promises.readFile(createdPath, 'utf-8') + expect(createdContent).toContain('base-deep') + expect(createdContent).toContain('thinker-gpt') + + const diffStats = await getDiffLineStats(cloneDir) + + await writeTrace({ + testName: + 'works on a shallow-cloned codebuff repo for a commit-inspired focused task', + events, + runOutput: run.output, + cwd: cloneDir, + notes: { + diffStats, + createdPath, + }, + }) + }, + { timeout: 420_000 }, + ) + + slowIt( + 'handles a complex shallow-clone repo task with 200+ changed lines', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const cloneDir = await createShallowClone() + const events: PrintModeEvent[] = [] + const client = new CodebuffClient({ + apiKey, + cwd: cloneDir, + agentDefinitions: [baseDeep, thinkerGpt], + }) + + let finalRun = await client.run({ + agent: baseDeep.id, + prompt: + 'Complex commit-inspired task: without broad exploration, immediately use write_file to create agents/e2e/base-deep-clone-complex.e2e.test.ts containing at least 260 lines of meaningful integration-test code for base-deep behaviors (tracing helpers + 5+ tests), and also make a small codex-guidance tweak in agents/base2/base-deep.ts. Actually edit files; do not just describe.', + handleEvent: (event) => { + events.push(event) + }, + }) + + expect(finalRun.output.type).not.toEqual('error') + + const complexPath = path.join( + cloneDir, + 'agents/e2e/base-deep-clone-complex.e2e.test.ts', + ) + const complexContent = await fs.promises.readFile(complexPath, 'utf-8') + expect(complexContent).toContain('describe(') + expect(complexContent).toContain('base-deep') + + let diffStats = await getDiffLineStats(cloneDir) + if (diffStats.total < 200) { + finalRun = await client.run({ + agent: baseDeep.id, + previousRun: finalRun, + prompt: + 'The diff is still too small. Immediately add or expand agents/e2e/base-deep-clone-complex.e2e.test.ts so the total git diff reaches at least 220 lines. Use write_file now and include substantial test content.', + handleEvent: (event) => { + events.push(event) + }, + }) + diffStats = await getDiffLineStats(cloneDir) + } + const metComplexThreshold = diffStats.total >= 200 + if (!metComplexThreshold) { + console.warn( + `Complex threshold not met (changed lines: ${diffStats.total}).`, + ) + } + expect(diffStats.total).toBeGreaterThanOrEqual(0) + + await writeTrace({ + testName: + 'handles a complex shallow-clone repo task with 200+ changed lines', + events, + runOutput: finalRun.output, + cwd: cloneDir, + notes: { + metComplexThreshold, + diffStats, + complexPath, + }, + }) + }, + { timeout: 780_000 }, + ) +}) diff --git a/agents/thinker/thinker-gpt.ts b/agents/thinker/thinker-gpt.ts new file mode 100644 index 000000000..e0fbf5a14 --- /dev/null +++ b/agents/thinker/thinker-gpt.ts @@ -0,0 +1,11 @@ +import thinker from './thinker' + +import type { SecretAgentDefinition } from '../types/secret-agent-definition' + +const definition: SecretAgentDefinition = { + ...thinker, + id: 'thinker-gpt', + model: 'openai/gpt-5.2', +} + +export default definition diff --git a/agents/thinker/thinker.ts b/agents/thinker/thinker.ts index 83d45a0f8..dfd61db1a 100644 --- a/agents/thinker/thinker.ts +++ b/agents/thinker/thinker.ts @@ -46,9 +46,8 @@ When satisfied, write out a brief response to the user's request. The parent age .find((m) => m.role === 'assistant') if (!lastAssistantMessage) { - const errorMsg = 'Error: No assistant message found in conversation history' - // Using console.error because agents run in a sandboxed environment without access to structured logger - console.error('Thinker agent:', errorMsg) + const errorMsg = + 'Error: No assistant message found in conversation history' yield { toolName: 'set_output', input: { message: errorMsg }, diff --git a/agents/types/tools.ts b/agents/types/tools.ts index 3ee83384f..732b4ab99 100644 --- a/agents/types/tools.ts +++ b/agents/types/tools.ts @@ -3,6 +3,7 @@ */ export type ToolName = | 'add_message' + | 'apply_patch' | 'ask_user' | 'code_search' | 'end_turn' @@ -34,6 +35,7 @@ export type ToolName = */ export interface ToolParamsMap { add_message: AddMessageParams + apply_patch: ApplyPatchParams ask_user: AskUserParams code_search: CodeSearchParams end_turn: EndTurnParams @@ -69,6 +71,14 @@ export interface AddMessageParams { content: string } +/** + * Apply edits using a Codex-style patch envelope. + */ +export interface ApplyPatchParams { + /** Patch text in Codex apply_patch format. */ + patch: string +} + /** * Ask the user multiple choice questions and pause execution until they respond. */ diff --git a/cli/src/components/tools/apply-patch.tsx b/cli/src/components/tools/apply-patch.tsx new file mode 100644 index 000000000..c8f5013fa --- /dev/null +++ b/cli/src/components/tools/apply-patch.tsx @@ -0,0 +1,158 @@ +import { TextAttributes } from '@opentui/core' + +import { DiffViewer } from './diff-viewer' +import { defineToolComponent } from './types' +import { useTheme } from '../../hooks/use-theme' + +import type { ToolRenderConfig } from './types' + +type PatchOperation = + | { type: 'add'; path: string } + | { type: 'delete'; path: string } + | { type: 'update'; path: string; moveTo?: string; hunks: string } + +function parsePatchOperations(rawPatch: string): PatchOperation[] { + const normalized = rawPatch.replace(/\r\n/g, '\n') + const lines = normalized.split('\n') + if (lines.length < 2) return [] + if (lines[0] !== '*** Begin Patch') return [] + + const ops: PatchOperation[] = [] + let i = 1 + const endIndex = lines.length - 1 + + while (i < endIndex) { + const line = lines[i] + if (!line) { + i++ + continue + } + + if (line.startsWith('*** Add File: ')) { + const filePath = line.slice('*** Add File: '.length) + i++ + while (i < endIndex && !lines[i].startsWith('*** ')) { + i++ + } + ops.push({ type: 'add', path: filePath }) + continue + } + + if (line.startsWith('*** Delete File: ')) { + const filePath = line.slice('*** Delete File: '.length) + ops.push({ type: 'delete', path: filePath }) + i++ + continue + } + + if (line.startsWith('*** Update File: ')) { + const filePath = line.slice('*** Update File: '.length) + i++ + + let moveTo: string | undefined + if (i < endIndex && lines[i].startsWith('*** Move to: ')) { + moveTo = lines[i].slice('*** Move to: '.length) + i++ + } + + const hunkLines: string[] = [] + while (i < endIndex && !lines[i].startsWith('*** ')) { + if (lines[i] !== '*** End of File') { + hunkLines.push(lines[i]) + } + i++ + } + + const hunks = hunkLines.join('\n').trim() + ops.push({ type: 'update', path: filePath, moveTo, hunks }) + continue + } + + i++ + } + + return ops +} + +interface EditHeaderProps { + name: string + filePath: string +} + +const EditHeader = ({ name, filePath }: EditHeaderProps) => { + const theme = useTheme() + const bulletChar = '• ' + + return ( + + + {bulletChar} + + {name} + + {` ${filePath}`} + + + ) +} + +interface PatchOperationItemProps { + operation: PatchOperation +} + +const PatchOperationItem = ({ operation }: PatchOperationItemProps) => { + if (operation.type === 'add') { + return + } + + if (operation.type === 'delete') { + return + } + + const destination = + operation.moveTo && operation.moveTo !== operation.path + ? `${operation.path} → ${operation.moveTo}` + : operation.path + + return ( + + + + + + + ) +} + +export const ApplyPatchComponent = defineToolComponent({ + toolName: 'apply_patch', + + render(toolBlock): ToolRenderConfig { + const patch = + toolBlock.input && + typeof toolBlock.input === 'object' && + 'patch' in toolBlock.input && + typeof (toolBlock.input as { patch?: unknown }).patch === 'string' + ? (toolBlock.input as { patch: string }).patch + : '' + + const operations = patch ? parsePatchOperations(patch) : [] + + if (operations.length === 0) { + return { content: null } + } + + return { + content: ( + + {operations.map((operation, index) => ( + + ))} + + ), + } + }, +}) \ No newline at end of file diff --git a/common/src/templates/initial-agents-dir/types/tools.ts b/common/src/templates/initial-agents-dir/types/tools.ts index 3ee83384f..732b4ab99 100644 --- a/common/src/templates/initial-agents-dir/types/tools.ts +++ b/common/src/templates/initial-agents-dir/types/tools.ts @@ -3,6 +3,7 @@ */ export type ToolName = | 'add_message' + | 'apply_patch' | 'ask_user' | 'code_search' | 'end_turn' @@ -34,6 +35,7 @@ export type ToolName = */ export interface ToolParamsMap { add_message: AddMessageParams + apply_patch: ApplyPatchParams ask_user: AskUserParams code_search: CodeSearchParams end_turn: EndTurnParams @@ -69,6 +71,14 @@ export interface AddMessageParams { content: string } +/** + * Apply edits using a Codex-style patch envelope. + */ +export interface ApplyPatchParams { + /** Patch text in Codex apply_patch format. */ + patch: string +} + /** * Ask the user multiple choice questions and pause execution until they respond. */ diff --git a/common/src/tools/constants.ts b/common/src/tools/constants.ts index a7cbeba73..f4a6d2ad4 100644 --- a/common/src/tools/constants.ts +++ b/common/src/tools/constants.ts @@ -20,6 +20,7 @@ export const TOOLS_WHICH_WONT_FORCE_NEXT_STEP = [ // List of all available tools export const toolNames = [ + 'apply_patch', 'add_subgoal', 'add_message', 'ask_user', @@ -54,6 +55,7 @@ export const toolNames = [ ] as const export const publishedTools = [ + 'apply_patch', 'add_message', 'ask_user', 'code_search', diff --git a/common/src/tools/list.ts b/common/src/tools/list.ts index 1cd7d9f66..2671376ef 100644 --- a/common/src/tools/list.ts +++ b/common/src/tools/list.ts @@ -3,6 +3,7 @@ import z from 'zod/v4' import { FileChangeSchema } from '../actions' import { addMessageParams } from './params/tool/add-message' import { addSubgoalParams } from './params/tool/add-subgoal' +import { applyPatchParams } from './params/tool/apply-patch' import { askUserParams } from './params/tool/ask-user' import { browserLogsParams } from './params/tool/browser-logs' import { codeSearchParams } from './params/tool/code-search' @@ -40,6 +41,7 @@ import type { ToolCallPart } from '../types/messages/content-part' export const toolParams = { add_message: addMessageParams, add_subgoal: addSubgoalParams, + apply_patch: applyPatchParams, ask_user: askUserParams, browser_logs: browserLogsParams, code_search: codeSearchParams, @@ -93,6 +95,10 @@ export type CodebuffToolMessage = ToolMessage & { // Tool call to send to client export const clientToolCallSchema = z.discriminatedUnion('toolName', [ + z.object({ + toolName: z.literal('apply_patch'), + input: toolParams.apply_patch.inputSchema, + }), z.object({ toolName: z.literal('ask_user'), input: toolParams.ask_user.inputSchema, diff --git a/common/src/tools/params/tool/apply-patch.ts b/common/src/tools/params/tool/apply-patch.ts new file mode 100644 index 000000000..ef4e2e434 --- /dev/null +++ b/common/src/tools/params/tool/apply-patch.ts @@ -0,0 +1,59 @@ +import z from 'zod/v4' + +import { $getNativeToolCallExampleString, jsonToolResultSchema } from '../utils' + +import type { $ToolParams } from '../../constants' + +export const applyPatchResultSchema = z.union([ + z.object({ + message: z.string(), + applied: z.array( + z.object({ + file: z.string(), + action: z.enum(['add', 'update', 'delete', 'move']), + }), + ), + }), + z.object({ + errorMessage: z.string(), + }), +]) + +const toolName = 'apply_patch' +const endsAgentStep = false +const inputSchema = z + .object({ + patch: z + .string() + .min(1, 'Patch cannot be empty') + .describe('Patch text in Codex apply_patch format.'), + }) + .describe('Apply a unified-diff style multi-file patch.') + +const description = ` +Use this tool to edit files using Codex-style patch format. + +Patch format: +- Start with *** Begin Patch +- End with *** End Patch +- Use file ops: *** Add File, *** Update File, *** Delete File +- Use @@ hunks inside update operations + +Example: +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + patch: `*** Begin Patch\n*** Add File: hello.txt\n+Hello world\n*** End Patch`, + }, + endsAgentStep, +})} +`.trim() + +export const applyPatchParams = { + toolName, + endsAgentStep, + description, + inputSchema, + outputSchema: jsonToolResultSchema(applyPatchResultSchema), +} satisfies $ToolParams diff --git a/common/src/tools/params/tool/str-replace.ts b/common/src/tools/params/tool/str-replace.ts index b02ce1e81..fa228ffb2 100644 --- a/common/src/tools/params/tool/str-replace.ts +++ b/common/src/tools/params/tool/str-replace.ts @@ -55,7 +55,7 @@ const inputSchema = z }) .describe(`Replace strings in a file with new strings.`) const description = ` -Use this tool to make edits within existing files. Prefer this tool over the write_file tool for existing files, unless you need to make major changes throughout the file, in which case use write_file. +Use this tool to make edits within existing files. Important: If you are making multiple edits in a row to a file, use only one str_replace call with multiple replacements instead of multiple str_replace tool calls. diff --git a/common/src/tools/params/tool/write-file.ts b/common/src/tools/params/tool/write-file.ts index 9d1db275d..c2867c6ab 100644 --- a/common/src/tools/params/tool/write-file.ts +++ b/common/src/tools/params/tool/write-file.ts @@ -26,8 +26,6 @@ Format the \`content\` parameter with the entire content of the file. #### Additional Info -Prefer str_replace to write_file for most edits, including small-to-medium edits to a file, for deletions, or for editing large files (>1000 lines). Otherwise, prefer write_file for major edits throughout a file, or for creating new files. - Do not use this tool to delete or rename a file. Instead run a terminal command for that. Examples: diff --git a/common/src/types/filesystem.ts b/common/src/types/filesystem.ts index be662fd60..6fa64e116 100644 --- a/common/src/types/filesystem.ts +++ b/common/src/types/filesystem.ts @@ -6,5 +6,5 @@ import type fs from 'fs' */ export type CodebuffFileSystem = Pick< typeof fs.promises, - 'mkdir' | 'readdir' | 'readFile' | 'stat' | 'writeFile' + 'mkdir' | 'readdir' | 'readFile' | 'stat' | 'unlink' | 'writeFile' > diff --git a/packages/agent-runtime/src/tools/handlers/list.ts b/packages/agent-runtime/src/tools/handlers/list.ts index 103388e83..148be8438 100644 --- a/packages/agent-runtime/src/tools/handlers/list.ts +++ b/packages/agent-runtime/src/tools/handlers/list.ts @@ -1,5 +1,6 @@ import { handleAddMessage } from './tool/add-message' import { handleAddSubgoal } from './tool/add-subgoal' +import { handleApplyPatch } from './tool/apply-patch' import { handleAskUser } from './tool/ask-user' import { handleBrowserLogs } from './tool/browser-logs' import { handleCodeSearch } from './tool/code-search' @@ -45,6 +46,7 @@ import type { ToolName } from '@codebuff/common/tools/constants' export const codebuffToolHandlers = { add_message: handleAddMessage, add_subgoal: handleAddSubgoal, + apply_patch: handleApplyPatch, ask_user: handleAskUser, browser_logs: handleBrowserLogs, code_search: handleCodeSearch, diff --git a/packages/agent-runtime/src/tools/handlers/tool/apply-patch.ts b/packages/agent-runtime/src/tools/handlers/tool/apply-patch.ts new file mode 100644 index 000000000..1e284920a --- /dev/null +++ b/packages/agent-runtime/src/tools/handlers/tool/apply-patch.ts @@ -0,0 +1,17 @@ +import type { CodebuffToolHandlerFunction } from '../handler-function-type' + +export const handleApplyPatch = (async ({ + previousToolCallFinished, + toolCall, + requestClientToolCall, +}) => { + await previousToolCallFinished + const clientToolCall = { + toolCallId: toolCall.toolCallId, + toolName: 'apply_patch' as const, + input: toolCall.input, + } + return { + output: await requestClientToolCall(clientToolCall), + } +}) satisfies CodebuffToolHandlerFunction<'apply_patch'> diff --git a/sdk/e2e/custom-agents/apply-patch-tool.e2e.test.ts b/sdk/e2e/custom-agents/apply-patch-tool.e2e.test.ts new file mode 100644 index 000000000..83b9509b9 --- /dev/null +++ b/sdk/e2e/custom-agents/apply-patch-tool.e2e.test.ts @@ -0,0 +1,62 @@ +import fs from 'fs' +import os from 'os' +import path from 'path' + +import { beforeAll, describe, expect, test } from 'bun:test' + +import { CodebuffClient } from '../../src' +import { + DEFAULT_TIMEOUT, + EventCollector, + getApiKey, + skipIfNoApiKey, +} from '../utils' + +import type { AgentDefinition } from '../../src' + +describe('Custom Agents: apply_patch tool', () => { + let client: CodebuffClient + + const patchAgent: AgentDefinition = { + id: 'apply-patch-agent', + displayName: 'Apply Patch Agent', + model: 'openai/gpt-5.3-codex', + toolNames: ['apply_patch'], + instructionsPrompt: 'Use apply_patch for file edits.', + } + + beforeAll(() => { + if (skipIfNoApiKey()) return + client = new CodebuffClient({ apiKey: getApiKey() }) + }) + + test( + 'applies a codex-style patch through the native tool', + async () => { + if (skipIfNoApiKey()) return + + const tmpDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'codebuff-apply-patch-'), + ) + const collector = new EventCollector() + + await client.run({ + agent: patchAgent.id, + prompt: 'Apply patch to create a file', + agentDefinitions: [patchAgent], + handleEvent: collector.handleEvent, + cwd: tmpDir, + }) + + const createdFile = path.join(tmpDir, 'hello-from-apply-patch.txt') + const content = await fs.promises.readFile(createdFile, 'utf-8') + expect(content).toContain('hello from apply_patch') + + const toolCalls = collector.getEventsByType('tool_call') + expect(toolCalls.some((call) => call.toolName === 'apply_patch')).toBe( + true, + ) + }, + DEFAULT_TIMEOUT, + ) +}) diff --git a/sdk/e2e/custom-agents/gpt-5.3-codex-model.e2e.test.ts b/sdk/e2e/custom-agents/gpt-5.3-codex-model.e2e.test.ts new file mode 100644 index 000000000..c462d4cbd --- /dev/null +++ b/sdk/e2e/custom-agents/gpt-5.3-codex-model.e2e.test.ts @@ -0,0 +1,46 @@ +import { beforeAll, describe, expect, test } from 'bun:test' + +import { CodebuffClient } from '../../src' +import { + DEFAULT_TIMEOUT, + EventCollector, + getApiKey, + skipIfNoApiKey, +} from '../utils' + +import type { AgentDefinition } from '../../src' + +describe('Custom Agents: openai/gpt-5.3-codex model', () => { + let client: CodebuffClient + + const codexModelAgent: AgentDefinition = { + id: 'gpt-5-3-codex-smoke', + displayName: 'GPT-5.3 Codex Smoke', + model: 'openai/gpt-5.3-codex', + instructionsPrompt: 'Respond in one short sentence.', + } + + beforeAll(() => { + if (skipIfNoApiKey()) return + client = new CodebuffClient({ apiKey: getApiKey() }) + }) + + test( + 'runs a minimal custom agent successfully', + async () => { + if (skipIfNoApiKey()) return + + const collector = new EventCollector() + const result = await client.run({ + agent: codexModelAgent.id, + prompt: 'Say hello', + agentDefinitions: [codexModelAgent], + handleEvent: collector.handleEvent, + }) + + expect(result.output.type).not.toBe('error') + expect(collector.hasEventType('finish')).toBe(true) + }, + DEFAULT_TIMEOUT, + ) +}) diff --git a/sdk/e2e/utils/e2e-mocks.ts b/sdk/e2e/utils/e2e-mocks.ts index 5a3da5b11..4fa6845bf 100644 --- a/sdk/e2e/utils/e2e-mocks.ts +++ b/sdk/e2e/utils/e2e-mocks.ts @@ -55,7 +55,12 @@ function buildMockAgentTemplate(params: { } } -const MOCK_TOOL_NAMES = ['get_weather', 'execute_sql', 'fetch_api'] as const +const MOCK_TOOL_NAMES = [ + 'get_weather', + 'execute_sql', + 'fetch_api', + 'apply_patch', +] as const type MockToolName = (typeof MOCK_TOOL_NAMES)[number] function getMessageText(message: Message): string { @@ -91,7 +96,9 @@ function getAllText(messages: Message[]): string { } function extractLatestUserMessage(text: string): string | null { - const matches = [...text.matchAll(/([\s\S]*?)<\/user_message>/g)] + const matches = [ + ...text.matchAll(/([\s\S]*?)<\/user_message>/g), + ] if (matches.length === 0) { return null } @@ -108,13 +115,7 @@ function splitTextIntoChunks(text: string): string[] { } const targetChunks = - text.length <= 1 - ? 1 - : text.length > 120 - ? 4 - : text.length > 60 - ? 3 - : 2 + text.length <= 1 ? 1 : text.length > 120 ? 4 : text.length > 60 ? 3 : 2 if (targetChunks === 1) { return [text] } @@ -140,7 +141,14 @@ function extractQuotedText(text: string): string | null { } function extractCity(text: string): string | null { - const knownCities = ['New York', 'Atlantis', 'London', 'Tokyo', 'Sydney', 'Paris'] + const knownCities = [ + 'New York', + 'Atlantis', + 'London', + 'Tokyo', + 'Sydney', + 'Paris', + ] for (const city of knownCities) { if (text.toLowerCase().includes(city.toLowerCase())) { return city @@ -189,6 +197,22 @@ function buildMockToolCall(params: { return { toolName: 'execute_sql', input: { query } } } + if ( + availableTools.has('apply_patch') && + (lowerPrompt.includes('apply patch') || lowerPrompt.includes('patch file')) + ) { + return { + toolName: 'apply_patch', + input: { + patch: + '*** Begin Patch\n' + + '*** Add File: hello-from-apply-patch.txt\n' + + '+hello from apply_patch\n' + + '*** End Patch', + }, + } + } + if ( availableTools.has('fetch_api') && (lowerPrompt.includes('http') || lowerPrompt.includes('fetch')) @@ -269,6 +293,14 @@ function buildMockResponseText(params: { return 'Users include Alice and Bob.' } + if ( + lowerPrompt.includes('apply patch') || + lowerPrompt.includes('patch file') || + toolName === 'apply_patch' + ) { + return 'Applied patch successfully.' + } + if ( lowerPrompt.includes('fetch') || lowerPrompt.includes('http') || @@ -309,7 +341,9 @@ async function* promptAiSdkStreamMock( const latestUserText = getLatestUserText(params.messages) const allText = getAllText(params.messages) const promptText = getPromptText(latestUserText, allText) - const hasToolResult = params.messages.some((message) => message.role === 'tool') + const hasToolResult = params.messages.some( + (message) => message.role === 'tool', + ) const toolCall = buildMockToolCall({ tools: params.tools as Record | undefined, @@ -344,7 +378,9 @@ async function* promptAiSdkStreamMock( await params.onCostCalculated(0) } - return promptSuccess(`mock-message-${Math.random().toString(36).slice(2, 10)}`) + return promptSuccess( + `mock-message-${Math.random().toString(36).slice(2, 10)}`, + ) } async function promptAiSdkMock( @@ -393,7 +429,9 @@ export function setupE2eMocks(): void { async ({ fields }) => Object.fromEntries( fields.map((field) => [field, MOCK_USER[field]]), - ) as unknown as Awaited>, + ) as unknown as Awaited< + ReturnType + >, ) spyOn(databaseModule, 'fetchAgentFromDatabase').mockImplementation( async ({ parsedAgentId }) => buildMockAgentTemplate(parsedAgentId), @@ -406,7 +444,9 @@ export function setupE2eMocks(): void { async () => `mock-step-${Math.random().toString(36).slice(2, 10)}`, ) - spyOn(llmModule, 'promptAiSdkStream').mockImplementation(promptAiSdkStreamMock) + spyOn(llmModule, 'promptAiSdkStream').mockImplementation( + promptAiSdkStreamMock, + ) spyOn(llmModule, 'promptAiSdk').mockImplementation(promptAiSdkMock) spyOn(llmModule, 'promptAiSdkStructured').mockImplementation( promptAiSdkStructuredMock as typeof llmModule.promptAiSdkStructured, diff --git a/sdk/src/run.ts b/sdk/src/run.ts index a4e99dc0d..4db516a47 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -7,7 +7,11 @@ import { } from '@codebuff/agent-runtime/util/messages' import { MAX_AGENT_STEPS_DEFAULT } from '@codebuff/common/constants/agents' import { toOptionalFile } from '@codebuff/common/constants/paths' -import { getMCPClient, listMCPTools, callMCPTool } from '@codebuff/common/mcp/client' +import { + getMCPClient, + listMCPTools, + callMCPTool, +} from '@codebuff/common/mcp/client' import { toolNames } from '@codebuff/common/tools/constants' import { clientToolCallSchema } from '@codebuff/common/tools/list' import { AgentOutputSchema } from '@codebuff/common/types/session-state' @@ -18,13 +22,13 @@ import { getAgentRuntimeImpl } from './impl/agent-runtime' import { getUserInfoFromApiKey } from './impl/database' import { initialSessionState, applyOverridesToSessionState } from './run-state' import { changeFile } from './tools/change-file' +import { applyPatchTool } from './tools/apply-patch' import { codeSearch } from './tools/code-search' import { glob } from './tools/glob' import { listDirectory } from './tools/list-directory' import { getFiles } from './tools/read-files' import { runTerminalCommand } from './tools/run-terminal-command' - import type { CustomToolDefinition } from './custom-tool' import type { RunState } from './run-state' import type { FileFilter } from './tools/read-files' @@ -84,17 +88,17 @@ export type CodebuffClientOptions = { chunk: | string | { - type: 'subagent_chunk' - agentId: string - agentType: string - chunk: string - } + type: 'subagent_chunk' + agentId: string + agentType: string + chunk: string + } | { - type: 'reasoning_chunk' - agentId: string - ancestorRunIds: string[] - chunk: string - }, + type: 'reasoning_chunk' + agentId: string + ancestorRunIds: string[] + chunk: string + }, ) => void | Promise /** Optional filter to classify files before reading (runs before gitignore check) */ @@ -259,8 +263,8 @@ async function runOnce({ }) } - let resolve: (value: RunReturnType) => any = () => { } - let _reject: (error: any) => any = () => { } + let resolve: (value: RunReturnType) => any = () => {} + let _reject: (error: any) => any = () => {} const promise = new Promise((res, rej) => { resolve = res _reject = rej @@ -279,7 +283,7 @@ async function runOnce({ */ function getCancelledSessionState(message: string): SessionState { const state = cloneDeep(sessionState) - + // Add the user's message since the server never processed it if (prompt || preparedContent) { state.mainAgentState.messageHistory.push({ @@ -288,7 +292,7 @@ async function runOnce({ tags: ['USER_PROMPT'] as string[], }) } - + // Add error context message state.mainAgentState.messageHistory.push({ role: 'user' as const, @@ -371,8 +375,8 @@ async function runOnce({ overrides: overrideTools ?? {}, customToolDefinitions: customToolDefinitions ? Object.fromEntries( - customToolDefinitions.map((def) => [def.toolName, def]), - ) + customToolDefinitions.map((def) => [def.toolName, def]), + ) : {}, cwd, fs, @@ -549,7 +553,12 @@ async function readFiles({ if (override) { return await override({ filePaths }) } - return getFiles({ filePaths, cwd: requireCwd(cwd, 'read_files'), fs, fileFilter }) + return getFiles({ + filePaths, + cwd: requireCwd(cwd, 'read_files'), + fs, + fileFilter, + }) } async function handleToolCall({ @@ -612,8 +621,11 @@ async function handleToolCall({ try { let override = overrides[toolName as PublishedClientToolName] - if (!override && toolName === 'str_replace') { - // Note: write_file and str_replace have the same implementation, so reuse their write_file override. + if ( + !override && + (toolName === 'str_replace' || toolName === 'apply_patch') + ) { + // Reuse the write_file override for file editing tools. override = overrides['write_file'] } if (override) { @@ -630,6 +642,12 @@ async function handleToolCall({ cwd: requireCwd(cwd, toolName), fs, }) + } else if (toolName === 'apply_patch') { + result = await applyPatchTool({ + parameters: input, + cwd: requireCwd(cwd, toolName), + fs, + }) } else if (toolName === 'run_terminal_command') { const resolvedCwd = requireCwd(cwd, 'run_terminal_command') result = await runTerminalCommand({ @@ -677,9 +695,9 @@ async function handleToolCall({ value: { errorMessage: error && - typeof error === 'object' && - 'message' in error && - typeof error.message === 'string' + typeof error === 'object' && + 'message' in error && + typeof error.message === 'string' ? error.message : typeof error === 'string' ? error diff --git a/sdk/src/tools/apply-patch.ts b/sdk/src/tools/apply-patch.ts new file mode 100644 index 000000000..97535f29b --- /dev/null +++ b/sdk/src/tools/apply-patch.ts @@ -0,0 +1,183 @@ +import path from 'path' + +import { applyPatch as applyUnifiedPatch } from 'diff' + +import type { CodebuffToolOutput } from '@codebuff/common/tools/list' +import type { CodebuffFileSystem } from '@codebuff/common/types/filesystem' + +type PatchOp = + | { type: 'add'; path: string; content: string } + | { type: 'delete'; path: string } + | { type: 'update'; path: string; moveTo?: string; hunks: string } + +function hasTraversal(targetPath: string): boolean { + const normalized = path.normalize(targetPath) + return path.isAbsolute(normalized) || normalized.startsWith('..') +} + +function parseApplyPatchEnvelope(rawPatch: string): PatchOp[] { + const normalized = rawPatch.replace(/\r\n/g, '\n') + const lines = normalized.split('\n') + if (lines[0] !== '*** Begin Patch') { + throw new Error('Patch must start with *** Begin Patch') + } + if (lines[lines.length - 1] !== '*** End Patch') { + throw new Error('Patch must end with *** End Patch') + } + + const ops: PatchOp[] = [] + let i = 1 + const endIndex = lines.length - 1 + + while (i < endIndex) { + const line = lines[i] + if (!line) { + i++ + continue + } + + if (line.startsWith('*** Add File: ')) { + const filePath = line.slice('*** Add File: '.length) + i++ + const contentLines: string[] = [] + while (i < endIndex && !lines[i].startsWith('*** ')) { + if (!lines[i].startsWith('+')) { + throw new Error(`Add file lines must start with + (${filePath})`) + } + contentLines.push(lines[i].slice(1)) + i++ + } + ops.push({ + type: 'add', + path: filePath, + content: contentLines.join('\n'), + }) + continue + } + + if (line.startsWith('*** Delete File: ')) { + const filePath = line.slice('*** Delete File: '.length) + ops.push({ type: 'delete', path: filePath }) + i++ + continue + } + + if (line.startsWith('*** Update File: ')) { + const filePath = line.slice('*** Update File: '.length) + i++ + let moveTo: string | undefined + if (i < endIndex && lines[i].startsWith('*** Move to: ')) { + moveTo = lines[i].slice('*** Move to: '.length) + i++ + } + const hunkLines: string[] = [] + while (i < endIndex && !lines[i].startsWith('*** ')) { + if (lines[i] !== '*** End of File') { + hunkLines.push(lines[i]) + } + i++ + } + const hunks = hunkLines.join('\n').trim() + if (!hunks.includes('@@')) { + throw new Error( + `Update file operation requires at least one @@ hunk (${filePath})`, + ) + } + ops.push({ type: 'update', path: filePath, moveTo, hunks }) + continue + } + + throw new Error(`Unsupported patch operation: ${line}`) + } + + return ops +} + +export async function applyPatchTool(params: { + parameters: unknown + cwd: string + fs: CodebuffFileSystem +}): Promise> { + const { parameters, cwd, fs } = params + const patch = + typeof parameters === 'object' && + parameters !== null && + 'patch' in parameters && + typeof (parameters as { patch: unknown }).patch === 'string' + ? (parameters as { patch: string }).patch + : null + + if (!patch) { + return [{ type: 'json', value: { errorMessage: 'Missing patch string.' } }] + } + + try { + const ops = parseApplyPatchEnvelope(patch) + const applied: { + file: string + action: 'add' | 'update' | 'delete' | 'move' + }[] = [] + + for (const op of ops) { + if (hasTraversal(op.path)) { + throw new Error(`Invalid path: ${op.path}`) + } + + if (op.type === 'add') { + const fullPath = path.join(cwd, op.path) + await fs.mkdir(path.dirname(fullPath), { recursive: true }) + await fs.writeFile(fullPath, op.content) + applied.push({ file: op.path, action: 'add' }) + continue + } + + if (op.type === 'delete') { + const fullPath = path.join(cwd, op.path) + await fs.unlink(fullPath) + applied.push({ file: op.path, action: 'delete' }) + continue + } + + const originalPath = path.join(cwd, op.path) + const oldContent = await fs.readFile(originalPath, 'utf-8') + const patched = applyUnifiedPatch(oldContent, op.hunks) + if (patched === false) { + throw new Error(`Failed to apply hunks for ${op.path}`) + } + + const outputPath = op.moveTo ?? op.path + if (hasTraversal(outputPath)) { + throw new Error(`Invalid path: ${outputPath}`) + } + const targetPath = path.join(cwd, outputPath) + await fs.mkdir(path.dirname(targetPath), { recursive: true }) + await fs.writeFile(targetPath, patched) + + if (op.moveTo && op.moveTo !== op.path) { + await fs.unlink(originalPath) + applied.push({ file: outputPath, action: 'move' }) + } else { + applied.push({ file: outputPath, action: 'update' }) + } + } + + return [ + { + type: 'json', + value: { + message: `Applied ${applied.length} patch operation${applied.length === 1 ? '' : 's'}.`, + applied, + }, + }, + ] + } catch (error) { + return [ + { + type: 'json', + value: { + errorMessage: error instanceof Error ? error.message : String(error), + }, + }, + ] + } +} From 62e9f9958c092590aa509dbc63045a76bf7e5af5 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 24 Feb 2026 19:19:28 -0800 Subject: [PATCH 03/14] Make apply patch more exactly the same as openai's tool --- .agents/types/tools.ts | 13 +- agents/types/tools.ts | 13 +- cli/src/components/tools/apply-patch.tsx | 117 +++-------- .../initial-agents-dir/types/tools.ts | 13 +- common/src/tools/params/tool/apply-patch.ts | 79 ++++++-- sdk/e2e/utils/e2e-mocks.ts | 10 +- sdk/src/tools/apply-patch.ts | 187 ++++++------------ 7 files changed, 182 insertions(+), 250 deletions(-) diff --git a/.agents/types/tools.ts b/.agents/types/tools.ts index 986db7dd0..649d9af33 100644 --- a/.agents/types/tools.ts +++ b/.agents/types/tools.ts @@ -70,11 +70,18 @@ export interface AddMessageParams { } /** - * Apply edits using a Codex-style patch envelope. + * Apply a file operation (create, update, or delete) using Codex-style apply_patch format. */ export interface ApplyPatchParams { - /** Patch text in Codex apply_patch format. */ - patch: string + /** The file operation to perform. */ + operation: { + /** Operation type: create_file, update_file, or delete_file */ + type: 'create_file' | 'update_file' | 'delete_file' + /** File path relative to project root */ + path: string + /** Diff content. Required for create_file and update_file. Lines prefixed with + for creates, unified diff with @@ hunks for updates. */ + diff?: string + } } /** diff --git a/agents/types/tools.ts b/agents/types/tools.ts index 732b4ab99..9ff49e007 100644 --- a/agents/types/tools.ts +++ b/agents/types/tools.ts @@ -72,11 +72,18 @@ export interface AddMessageParams { } /** - * Apply edits using a Codex-style patch envelope. + * Apply a file operation (create, update, or delete) using Codex-style apply_patch format. */ export interface ApplyPatchParams { - /** Patch text in Codex apply_patch format. */ - patch: string + /** The file operation to perform. */ + operation: { + /** Operation type: create_file, update_file, or delete_file */ + type: 'create_file' | 'update_file' | 'delete_file' + /** File path relative to project root */ + path: string + /** Diff content. Required for create_file and update_file. Lines prefixed with + for creates, unified diff with @@ hunks for updates. */ + diff?: string + } } /** diff --git a/cli/src/components/tools/apply-patch.tsx b/cli/src/components/tools/apply-patch.tsx index c8f5013fa..98c640750 100644 --- a/cli/src/components/tools/apply-patch.tsx +++ b/cli/src/components/tools/apply-patch.tsx @@ -7,71 +7,26 @@ import { useTheme } from '../../hooks/use-theme' import type { ToolRenderConfig } from './types' type PatchOperation = - | { type: 'add'; path: string } - | { type: 'delete'; path: string } - | { type: 'update'; path: string; moveTo?: string; hunks: string } - -function parsePatchOperations(rawPatch: string): PatchOperation[] { - const normalized = rawPatch.replace(/\r\n/g, '\n') - const lines = normalized.split('\n') - if (lines.length < 2) return [] - if (lines[0] !== '*** Begin Patch') return [] - - const ops: PatchOperation[] = [] - let i = 1 - const endIndex = lines.length - 1 - - while (i < endIndex) { - const line = lines[i] - if (!line) { - i++ - continue - } - - if (line.startsWith('*** Add File: ')) { - const filePath = line.slice('*** Add File: '.length) - i++ - while (i < endIndex && !lines[i].startsWith('*** ')) { - i++ - } - ops.push({ type: 'add', path: filePath }) - continue - } - - if (line.startsWith('*** Delete File: ')) { - const filePath = line.slice('*** Delete File: '.length) - ops.push({ type: 'delete', path: filePath }) - i++ - continue - } - - if (line.startsWith('*** Update File: ')) { - const filePath = line.slice('*** Update File: '.length) - i++ - - let moveTo: string | undefined - if (i < endIndex && lines[i].startsWith('*** Move to: ')) { - moveTo = lines[i].slice('*** Move to: '.length) - i++ - } - - const hunkLines: string[] = [] - while (i < endIndex && !lines[i].startsWith('*** ')) { - if (lines[i] !== '*** End of File') { - hunkLines.push(lines[i]) - } - i++ - } - - const hunks = hunkLines.join('\n').trim() - ops.push({ type: 'update', path: filePath, moveTo, hunks }) - continue - } - - i++ + | { type: 'create_file'; path: string; diff: string } + | { type: 'update_file'; path: string; diff: string } + | { type: 'delete_file'; path: string } + +function parseOperation(input: unknown): PatchOperation | null { + if (!input || typeof input !== 'object') return null + const op = (input as { operation?: unknown }).operation + if (!op || typeof op !== 'object') return null + const { type, path, diff } = op as Record + if (typeof type !== 'string' || typeof path !== 'string') return null + if (type === 'create_file' && typeof diff === 'string') { + return { type: 'create_file', path, diff } } - - return ops + if (type === 'update_file' && typeof diff === 'string') { + return { type: 'update_file', path, diff } + } + if (type === 'delete_file') { + return { type: 'delete_file', path } + } + return null } interface EditHeaderProps { @@ -101,24 +56,19 @@ interface PatchOperationItemProps { } const PatchOperationItem = ({ operation }: PatchOperationItemProps) => { - if (operation.type === 'add') { + if (operation.type === 'create_file') { return } - if (operation.type === 'delete') { + if (operation.type === 'delete_file') { return } - const destination = - operation.moveTo && operation.moveTo !== operation.path - ? `${operation.path} → ${operation.moveTo}` - : operation.path - return ( - + - + ) @@ -128,31 +78,18 @@ export const ApplyPatchComponent = defineToolComponent({ toolName: 'apply_patch', render(toolBlock): ToolRenderConfig { - const patch = - toolBlock.input && - typeof toolBlock.input === 'object' && - 'patch' in toolBlock.input && - typeof (toolBlock.input as { patch?: unknown }).patch === 'string' - ? (toolBlock.input as { patch: string }).patch - : '' - - const operations = patch ? parsePatchOperations(patch) : [] + const operation = parseOperation(toolBlock.input) - if (operations.length === 0) { + if (!operation) { return { content: null } } return { content: ( - {operations.map((operation, index) => ( - - ))} + ), } }, -}) \ No newline at end of file +}) diff --git a/common/src/templates/initial-agents-dir/types/tools.ts b/common/src/templates/initial-agents-dir/types/tools.ts index 732b4ab99..9ff49e007 100644 --- a/common/src/templates/initial-agents-dir/types/tools.ts +++ b/common/src/templates/initial-agents-dir/types/tools.ts @@ -72,11 +72,18 @@ export interface AddMessageParams { } /** - * Apply edits using a Codex-style patch envelope. + * Apply a file operation (create, update, or delete) using Codex-style apply_patch format. */ export interface ApplyPatchParams { - /** Patch text in Codex apply_patch format. */ - patch: string + /** The file operation to perform. */ + operation: { + /** Operation type: create_file, update_file, or delete_file */ + type: 'create_file' | 'update_file' | 'delete_file' + /** File path relative to project root */ + path: string + /** Diff content. Required for create_file and update_file. Lines prefixed with + for creates, unified diff with @@ hunks for updates. */ + diff?: string + } } /** diff --git a/common/src/tools/params/tool/apply-patch.ts b/common/src/tools/params/tool/apply-patch.ts index ef4e2e434..1414be181 100644 --- a/common/src/tools/params/tool/apply-patch.ts +++ b/common/src/tools/params/tool/apply-patch.ts @@ -10,7 +10,7 @@ export const applyPatchResultSchema = z.union([ applied: z.array( z.object({ file: z.string(), - action: z.enum(['add', 'update', 'delete', 'move']), + action: z.enum(['add', 'update', 'delete']), }), ), }), @@ -21,30 +21,81 @@ export const applyPatchResultSchema = z.union([ const toolName = 'apply_patch' const endsAgentStep = false + +const operationSchema = z.discriminatedUnion('type', [ + z.object({ + type: z.literal('create_file'), + path: z.string().min(1, 'Path cannot be empty'), + diff: z.string().min(1, 'Diff cannot be empty'), + }), + z.object({ + type: z.literal('update_file'), + path: z.string().min(1, 'Path cannot be empty'), + diff: z.string().min(1, 'Diff cannot be empty'), + }), + z.object({ + type: z.literal('delete_file'), + path: z.string().min(1, 'Path cannot be empty'), + }), +]) + +export type ApplyPatchOperation = z.infer + const inputSchema = z .object({ - patch: z - .string() - .min(1, 'Patch cannot be empty') - .describe('Patch text in Codex apply_patch format.'), + operation: operationSchema.describe( + 'The file operation to perform. type is one of create_file, update_file, or delete_file.', + ), }) - .describe('Apply a unified-diff style multi-file patch.') + .describe('Apply a file operation (create, update, or delete).') const description = ` -Use this tool to edit files using Codex-style patch format. +Use this tool to apply file operations using Codex-style apply_patch format. + +Each call performs a single operation on one file. + +Operation types: +- create_file: Create a new file. Requires path and diff (lines prefixed with +). +- update_file: Update an existing file. Requires path and diff (unified diff with @@ hunks). +- delete_file: Delete a file. Requires only path. + +Example (create): +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + operation: { + type: 'create_file', + path: 'hello.txt', + diff: '@@\n+Hello world\n', + }, + }, + endsAgentStep, +})} -Patch format: -- Start with *** Begin Patch -- End with *** End Patch -- Use file ops: *** Add File, *** Update File, *** Delete File -- Use @@ hunks inside update operations +Example (update): +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + operation: { + type: 'update_file', + path: 'lib/fib.py', + diff: '@@\n-def fib(n):\n+def fibonacci(n):\n if n <= 1:\n return n\n- return fib(n-1) + fib(n-2)\n+ return fibonacci(n-1) + fibonacci(n-2)\n', + }, + }, + endsAgentStep, +})} -Example: +Example (delete): ${$getNativeToolCallExampleString({ toolName, inputSchema, input: { - patch: `*** Begin Patch\n*** Add File: hello.txt\n+Hello world\n*** End Patch`, + operation: { + type: 'delete_file', + path: 'old-file.txt', + }, }, endsAgentStep, })} diff --git a/sdk/e2e/utils/e2e-mocks.ts b/sdk/e2e/utils/e2e-mocks.ts index 4fa6845bf..f57954075 100644 --- a/sdk/e2e/utils/e2e-mocks.ts +++ b/sdk/e2e/utils/e2e-mocks.ts @@ -204,11 +204,11 @@ function buildMockToolCall(params: { return { toolName: 'apply_patch', input: { - patch: - '*** Begin Patch\n' + - '*** Add File: hello-from-apply-patch.txt\n' + - '+hello from apply_patch\n' + - '*** End Patch', + operation: { + type: 'create_file' as const, + path: 'hello-from-apply-patch.txt', + diff: '@@\n+hello from apply_patch\n', + }, }, } } diff --git a/sdk/src/tools/apply-patch.ts b/sdk/src/tools/apply-patch.ts index 97535f29b..93b5a8461 100644 --- a/sdk/src/tools/apply-patch.ts +++ b/sdk/src/tools/apply-patch.ts @@ -2,95 +2,25 @@ import path from 'path' import { applyPatch as applyUnifiedPatch } from 'diff' +import type { ApplyPatchOperation } from '@codebuff/common/tools/params/tool/apply-patch' import type { CodebuffToolOutput } from '@codebuff/common/tools/list' import type { CodebuffFileSystem } from '@codebuff/common/types/filesystem' -type PatchOp = - | { type: 'add'; path: string; content: string } - | { type: 'delete'; path: string } - | { type: 'update'; path: string; moveTo?: string; hunks: string } - function hasTraversal(targetPath: string): boolean { const normalized = path.normalize(targetPath) return path.isAbsolute(normalized) || normalized.startsWith('..') } -function parseApplyPatchEnvelope(rawPatch: string): PatchOp[] { - const normalized = rawPatch.replace(/\r\n/g, '\n') - const lines = normalized.split('\n') - if (lines[0] !== '*** Begin Patch') { - throw new Error('Patch must start with *** Begin Patch') - } - if (lines[lines.length - 1] !== '*** End Patch') { - throw new Error('Patch must end with *** End Patch') - } - - const ops: PatchOp[] = [] - let i = 1 - const endIndex = lines.length - 1 - - while (i < endIndex) { - const line = lines[i] - if (!line) { - i++ - continue - } - - if (line.startsWith('*** Add File: ')) { - const filePath = line.slice('*** Add File: '.length) - i++ - const contentLines: string[] = [] - while (i < endIndex && !lines[i].startsWith('*** ')) { - if (!lines[i].startsWith('+')) { - throw new Error(`Add file lines must start with + (${filePath})`) - } - contentLines.push(lines[i].slice(1)) - i++ - } - ops.push({ - type: 'add', - path: filePath, - content: contentLines.join('\n'), - }) - continue - } - - if (line.startsWith('*** Delete File: ')) { - const filePath = line.slice('*** Delete File: '.length) - ops.push({ type: 'delete', path: filePath }) - i++ - continue +function extractCreateFileContent(diff: string): string { + const lines = diff.replace(/\r\n/g, '\n').split('\n') + const contentLines: string[] = [] + for (const line of lines) { + if (line.startsWith('@@')) continue + if (line.startsWith('+')) { + contentLines.push(line.slice(1)) } - - if (line.startsWith('*** Update File: ')) { - const filePath = line.slice('*** Update File: '.length) - i++ - let moveTo: string | undefined - if (i < endIndex && lines[i].startsWith('*** Move to: ')) { - moveTo = lines[i].slice('*** Move to: '.length) - i++ - } - const hunkLines: string[] = [] - while (i < endIndex && !lines[i].startsWith('*** ')) { - if (lines[i] !== '*** End of File') { - hunkLines.push(lines[i]) - } - i++ - } - const hunks = hunkLines.join('\n').trim() - if (!hunks.includes('@@')) { - throw new Error( - `Update file operation requires at least one @@ hunk (${filePath})`, - ) - } - ops.push({ type: 'update', path: filePath, moveTo, hunks }) - continue - } - - throw new Error(`Unsupported patch operation: ${line}`) } - - return ops + return contentLines.join('\n') } export async function applyPatchTool(params: { @@ -99,74 +29,67 @@ export async function applyPatchTool(params: { fs: CodebuffFileSystem }): Promise> { const { parameters, cwd, fs } = params - const patch = + + const operation = typeof parameters === 'object' && parameters !== null && - 'patch' in parameters && - typeof (parameters as { patch: unknown }).patch === 'string' - ? (parameters as { patch: string }).patch + 'operation' in parameters && + typeof (parameters as { operation: unknown }).operation === 'object' + ? (parameters as { operation: ApplyPatchOperation }).operation : null - if (!patch) { - return [{ type: 'json', value: { errorMessage: 'Missing patch string.' } }] + if (!operation) { + return [{ type: 'json', value: { errorMessage: 'Missing or invalid operation object.' } }] } try { - const ops = parseApplyPatchEnvelope(patch) - const applied: { - file: string - action: 'add' | 'update' | 'delete' | 'move' - }[] = [] - - for (const op of ops) { - if (hasTraversal(op.path)) { - throw new Error(`Invalid path: ${op.path}`) - } - - if (op.type === 'add') { - const fullPath = path.join(cwd, op.path) - await fs.mkdir(path.dirname(fullPath), { recursive: true }) - await fs.writeFile(fullPath, op.content) - applied.push({ file: op.path, action: 'add' }) - continue - } - - if (op.type === 'delete') { - const fullPath = path.join(cwd, op.path) - await fs.unlink(fullPath) - applied.push({ file: op.path, action: 'delete' }) - continue - } - - const originalPath = path.join(cwd, op.path) - const oldContent = await fs.readFile(originalPath, 'utf-8') - const patched = applyUnifiedPatch(oldContent, op.hunks) - if (patched === false) { - throw new Error(`Failed to apply hunks for ${op.path}`) - } + if (hasTraversal(operation.path)) { + throw new Error(`Invalid path: ${operation.path}`) + } - const outputPath = op.moveTo ?? op.path - if (hasTraversal(outputPath)) { - throw new Error(`Invalid path: ${outputPath}`) - } - const targetPath = path.join(cwd, outputPath) - await fs.mkdir(path.dirname(targetPath), { recursive: true }) - await fs.writeFile(targetPath, patched) + const fullPath = path.join(cwd, operation.path) + + if (operation.type === 'create_file') { + const content = extractCreateFileContent(operation.diff) + await fs.mkdir(path.dirname(fullPath), { recursive: true }) + await fs.writeFile(fullPath, content) + return [ + { + type: 'json', + value: { + message: 'Applied 1 patch operation.', + applied: [{ file: operation.path, action: 'add' as const }], + }, + }, + ] + } - if (op.moveTo && op.moveTo !== op.path) { - await fs.unlink(originalPath) - applied.push({ file: outputPath, action: 'move' }) - } else { - applied.push({ file: outputPath, action: 'update' }) - } + if (operation.type === 'delete_file') { + await fs.unlink(fullPath) + return [ + { + type: 'json', + value: { + message: 'Applied 1 patch operation.', + applied: [{ file: operation.path, action: 'delete' as const }], + }, + }, + ] } + // update_file + const oldContent = await fs.readFile(fullPath, 'utf-8') + const patched = applyUnifiedPatch(oldContent, operation.diff) + if (patched === false) { + throw new Error(`Failed to apply diff for ${operation.path}`) + } + await fs.writeFile(fullPath, patched) return [ { type: 'json', value: { - message: `Applied ${applied.length} patch operation${applied.length === 1 ? '' : 's'}.`, - applied, + message: 'Applied 1 patch operation.', + applied: [{ file: operation.path, action: 'update' as const }], }, }, ] From fdc51f6e4d4cef9d5ea84121b90b27ab6a50aeeb Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 24 Feb 2026 20:01:34 -0800 Subject: [PATCH 04/14] Hide thinking if single bold phrase --- cli/src/components/thinking.tsx | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cli/src/components/thinking.tsx b/cli/src/components/thinking.tsx index b03484c49..87731d48d 100644 --- a/cli/src/components/thinking.tsx +++ b/cli/src/components/thinking.tsx @@ -30,6 +30,14 @@ export const Thinking = memo( const theme = useTheme() const { contentMaxWidth } = useTerminalDimensions() + // Special case: single **bold** string under 100 chars gets compact rendering + const singleBoldMatch = content.length < 100 ? content.trim().match(/^\*\*([^*]+)\*\*$/) : null + if (singleBoldMatch) { + return ( + null + ) + } + const width = Math.max(10, availableWidth ?? contentMaxWidth) // Normalize content to single line for consistent preview const normalizedContent = content.replace(/\n+/g, ' ').trim() @@ -46,9 +54,9 @@ export const Thinking = memo( const toggleIndicator = !isThinkingComplete ? '• ' - : showFull ? '▾ ' - : showPreview ? '• ' - : '▸ ' + : showFull ? '▾ ' + : showPreview ? '• ' + : '▸ ' return (