From 640f7f26a54780241e917b366c526dd56a9384f4 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Sat, 28 Mar 2026 00:09:03 +0000 Subject: [PATCH 01/21] feat: add /strands test command for TUI testing via MCP harness - Add tester mode to process-inputs.cjs (routes /strands test) - Add task-tester.sop.md with TUI testing instructions - Add tui-test-flows.md with 5 test flows - Add Node.js setup + build steps for tester mode in workflow - Wire TUI harness MCP server (stdio) into the Strands agent --- .github/agent-sops/task-tester.sop.md | 88 +++++++++++++++++++ .github/agent-sops/tui-test-flows.md | 50 +++++++++++ .github/scripts/javascript/process-inputs.cjs | 14 +-- .github/workflows/strands-command.yml | 18 ++++ 4 files changed, 165 insertions(+), 5 deletions(-) create mode 100644 .github/agent-sops/task-tester.sop.md create mode 100644 .github/agent-sops/tui-test-flows.md diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md new file mode 100644 index 000000000..f72ef3e33 --- /dev/null +++ b/.github/agent-sops/task-tester.sop.md @@ -0,0 +1,88 @@ +# Task Tester SOP + +## Role + +You are a TUI Tester. Your goal is to verify the AgentCore CLI's interactive TUI behavior by driving it through +predefined test flows using the TUI harness MCP tools. You post results as PR comments. + +You MUST NOT modify any code, create branches, or push commits. Your only output is test result comments. + +## Tools Available + +You have TUI harness MCP tools: `tui_launch`, `tui_send_keys`, `tui_action`, `tui_wait_for`, `tui_screenshot`, +`tui_read_screen`, `tui_close`, `tui_list_sessions`. + +You also have `shell` for setup commands and GitHub tools for posting comments. + +## Steps + +### 1. Setup + +- Read the test spec file at `.github/agent-sops/tui-test-flows.md` +- The CLI is installed globally as `agentcore`. Launch TUI sessions using `tui_launch` with `command: "agentcore"` and + the appropriate `args`. +- For non-interactive commands (e.g., `--json` output), prefer `shell` over `tui_launch`. + +### 2. Run Test Flows + +For each flow in the test spec: + +1. Create any required setup (e.g., temp directories, minimal projects) using `shell` +2. Use `tui_launch` to start the CLI with the specified arguments and `cwd` +3. Follow the flow steps: use `tui_action` (preferred โ€” combines send + wait + read in one call) or `tui_wait_for` + + `tui_send_keys` for multi-step interactions +4. Verify each expectation against the screen content +5. On **pass**: record the flow name as passed +6. On **failure**: use `tui_screenshot` to capture the terminal state, record the flow name, expected behavior, actual + behavior, and the screenshot text +7. Always `tui_close` the session when done, even on failure + +**Constraints:** + +- Use `timeoutMs: 10000` (10 seconds) minimum for all `tui_wait_for` and `tui_action` pattern waits +- Use small terminal dimensions: `cols: 100, rows: 24` +- If a wait times out, retry once before declaring failure +- Use text format screenshots only (not SVG) +- Keep terminal dimensions consistent across all flows + +### 3. Post Results + +Post a single summary comment on the PR with this format: + +```markdown +## ๐Ÿงช TUI Test Results + +**X/Y flows passed** + +### โœ… Passed + +- Flow name 1 +- Flow name 2 + +### โŒ Failed + +#### Flow name 3 + +**Expected:** description of what should have happened **Actual:** description of what happened + +
+Screenshot +``` + +(terminal screenshot here) + +``` + +
+``` + +If all flows pass, omit the Failed section. + +## Forbidden Actions + +- You MUST NOT modify, create, or delete any source files +- You MUST NOT run git add, git commit, or git push +- You MUST NOT create or update branches +- You MUST NOT approve or merge the pull request +- You MUST NOT run deploy, invoke, or any command that creates AWS resources +- Your ONLY output is test result comments on the pull request diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md new file mode 100644 index 000000000..e772235d6 --- /dev/null +++ b/.github/agent-sops/tui-test-flows.md @@ -0,0 +1,50 @@ +# TUI Test Flows + +Each flow describes a user interaction to verify. The tester agent drives these using the TUI harness MCP tools. + +--- + +## Flow: Help text lists all subcommands + +1. Launch: `agentcore --help` (use `tui_launch` with `command: "agentcore"`, `args: ["--help"]`) +2. Wait for: "Usage:" on screen +3. Expect all of these subcommands visible: `create`, `deploy`, `invoke`, `status`, `logs`, `add`, `remove` +4. Close session + +--- + +## Flow: Create wizard prompts for project name + +1. Launch: `agentcore create` (no flags, in a temp directory) +2. Wait for: a prompt asking for the project name (look for "name" or "project") +3. Expect: an input field or prompt is visible +4. Close session (Ctrl+C) + +--- + +## Flow: Create with --json produces valid JSON + +1. In a temp directory, run via shell: + `agentcore create --name TestProj --language Python --framework Strands --model-provider Bedrock --memory none --json` +2. Expect: stdout contains valid JSON with `"success": true` and `"projectPath"` +3. Verify the project directory was created + +--- + +## Flow: Add agent shows framework selection + +1. First create a project via shell: `agentcore create --name AgentTest --no-agent --json` (in a temp directory) +2. Launch: `agentcore add agent` in the created project directory +3. Wait for: agent name prompt +4. Type a name, press Enter +5. Wait for: framework or language selection to appear +6. Expect: at least "Strands" and "LangChain_LangGraph" visible as options +7. Close session (Ctrl+C) + +--- + +## Flow: Invalid project name shows error + +1. In a temp directory, run via shell: + `agentcore create --name "123invalid" --language Python --framework Strands --model-provider Bedrock --memory none --json` +2. Expect: exit code is non-zero OR output contains an error about the project name (must start with a letter) diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs index 27beb25d6..40c2b0697 100644 --- a/.github/scripts/javascript/process-inputs.cjs +++ b/.github/scripts/javascript/process-inputs.cjs @@ -78,6 +78,7 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) implementer: '.github/agent-sops/task-implementer.sop.md', reviewer: '.github/agent-sops/task-reviewer.sop.md', refiner: '.github/agent-sops/task-refiner.sop.md', + tester: '.github/agent-sops/task-tester.sop.md', }; const scriptFile = sopFiles[mode] || sopFiles.refiner; @@ -94,11 +95,13 @@ module.exports = async (context, github, core, inputs) => { const { issueId, command, issue } = await getIssueInfo(github, context, inputs); const isPullRequest = !!issue.data.pull_request; - const mode = command.startsWith('review') - ? 'reviewer' - : isPullRequest || command.startsWith('implement') - ? 'implementer' - : 'refiner'; + const mode = command.startsWith('test') + ? 'tester' + : command.startsWith('review') + ? 'reviewer' + : isPullRequest || command.startsWith('implement') + ? 'implementer' + : 'refiner'; console.log(`Is PR: ${isPullRequest}, Mode: ${mode}`); const branchName = await determineBranch(github, context, issueId, mode, isPullRequest); @@ -113,6 +116,7 @@ module.exports = async (context, github, core, inputs) => { core.setOutput('session_id', sessionId); core.setOutput('system_prompt', systemPrompt); core.setOutput('prompt', prompt); + core.setOutput('mode', mode); } catch (error) { const errorMsg = `Failed: ${error.message}`; console.error(errorMsg); diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index bfba8c80d..0c5f06d47 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -94,6 +94,21 @@ jobs: }; await processInputs(context, github, core, inputs); + - name: Setup Node.js (tester mode) + if: steps.process-inputs.outputs.mode == 'tester' + uses: actions/setup-node@v6 + with: + node-version: 20.x + cache: 'npm' + + - name: Build CLI and TUI harness (tester mode) + if: steps.process-inputs.outputs.mode == 'tester' + run: | + npm ci + npm run build + npm run build:harness + npm install -g "$(npm pack | tail -1)" + - name: Run Strands Agent uses: ./.github/actions/strands-action with: @@ -102,6 +117,9 @@ jobs: provider: 'bedrock' model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0' tools: 'strands_tools:shell,retrieve' + mcp_servers: + ${{ steps.process-inputs.outputs.mode == 'tester' && + '{"mcpServers":{"tui-harness":{"command":"node","args":["dist/mcp-harness/index.mjs"]}}}' || '' }} aws_role_arn: ${{ secrets.AWS_ROLE_ARN }} aws_region: 'us-west-2' pat_token: ${{ secrets.GITHUB_TOKEN }} From a64e65655d11538f25a8da7f8902d4f950d4e4c1 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 21:57:28 +0000 Subject: [PATCH 02/21] fix: handle fork workflow_dispatch repo context for cross-repo issue lookups --- .github/workflows/strands-command.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index 0c5f06d47..aee29850d 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -70,6 +70,7 @@ jobs: fetch-depth: 0 - name: Add strands-running label + continue-on-error: true uses: actions/github-script@v8 with: script: | @@ -86,6 +87,10 @@ jobs: uses: actions/github-script@v8 with: script: | + // When dispatched from a fork, point API calls at the upstream repo + if (context.eventName === 'workflow_dispatch' && context.repo.owner !== 'aws') { + context.repo = { owner: 'aws', repo: 'agentcore-cli' }; + } const processInputs = require('./.github/scripts/javascript/process-inputs.cjs'); const inputs = { issue_id: '${{ inputs.issue_id }}', From 3be5a74e463cdc25ec148c8f69345616c7a017fa Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 22:12:20 +0000 Subject: [PATCH 03/21] fix: use Object.defineProperty to override context.repo getter --- .github/workflows/strands-command.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index aee29850d..bd6252160 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -87,9 +87,11 @@ jobs: uses: actions/github-script@v8 with: script: | - // When dispatched from a fork, point API calls at the upstream repo + // When dispatched from a fork, override the repo getter to point at upstream if (context.eventName === 'workflow_dispatch' && context.repo.owner !== 'aws') { - context.repo = { owner: 'aws', repo: 'agentcore-cli' }; + Object.defineProperty(context, 'repo', { + get: () => ({ owner: 'aws', repo: 'agentcore-cli' }) + }); } const processInputs = require('./.github/scripts/javascript/process-inputs.cjs'); const inputs = { From 870e1adc3703edef57eda1202304937d908606e2 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 22:18:52 +0000 Subject: [PATCH 04/21] fix: include upstream repo in agent prompt and SOP --- .github/agent-sops/task-tester.sop.md | 3 +++ .github/scripts/javascript/process-inputs.cjs | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index f72ef3e33..382be5c7a 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -14,6 +14,9 @@ You have TUI harness MCP tools: `tui_launch`, `tui_send_keys`, `tui_action`, `tu You also have `shell` for setup commands and GitHub tools for posting comments. +**Important:** Always use `aws/agentcore-cli` as the repository for all GitHub API calls (get PR, post comments, etc.), +not the fork repository. + ## Steps ### 1. Setup diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs index 40c2b0697..f1c915c68 100644 --- a/.github/scripts/javascript/process-inputs.cjs +++ b/.github/scripts/javascript/process-inputs.cjs @@ -85,7 +85,9 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) const systemPrompt = fs.readFileSync(scriptFile, 'utf8'); let prompt = isPullRequest ? 'The pull request id is:' : 'The issue id is:'; - prompt += `${issueId}\n${command}\nreview and continue`; + prompt += `${issueId}\n`; + prompt += `The repository is: aws/agentcore-cli\n`; + prompt += `${command}\nreview and continue`; return { sessionId, systemPrompt, prompt }; } From 344abd22c49a2ae88277fc78dba2cb18ce83aedd Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 22:23:38 +0000 Subject: [PATCH 05/21] fix: use absolute path for MCP harness in workflow --- .github/workflows/strands-command.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index bd6252160..8237bbb30 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -126,7 +126,7 @@ jobs: tools: 'strands_tools:shell,retrieve' mcp_servers: ${{ steps.process-inputs.outputs.mode == 'tester' && - '{"mcpServers":{"tui-harness":{"command":"node","args":["dist/mcp-harness/index.mjs"]}}}' || '' }} + format('{"mcpServers":{"tui-harness":{"command":"node","args":["{0}/dist/mcp-harness/index.mjs"]}}}', github.workspace) || '' }} aws_role_arn: ${{ secrets.AWS_ROLE_ARN }} aws_region: 'us-west-2' pat_token: ${{ secrets.GITHUB_TOKEN }} From 6ac69d38300f1eb96a33533fed761a1b5e2c7763 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 22:26:08 +0000 Subject: [PATCH 06/21] fix: avoid format() with JSON braces, use separate step for MCP config --- .github/workflows/strands-command.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index 8237bbb30..e642a5963 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -116,6 +116,11 @@ jobs: npm run build:harness npm install -g "$(npm pack | tail -1)" + - name: Set MCP harness path + if: steps.process-inputs.outputs.mode == 'tester' + id: mcp-config + run: echo "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"${{ github.workspace }}/dist/mcp-harness/index.mjs\"]}}}" >> "$GITHUB_OUTPUT" + - name: Run Strands Agent uses: ./.github/actions/strands-action with: @@ -124,9 +129,7 @@ jobs: provider: 'bedrock' model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0' tools: 'strands_tools:shell,retrieve' - mcp_servers: - ${{ steps.process-inputs.outputs.mode == 'tester' && - format('{"mcpServers":{"tui-harness":{"command":"node","args":["{0}/dist/mcp-harness/index.mjs"]}}}', github.workspace) || '' }} + mcp_servers: ${{ steps.mcp-config.outputs.mcp_servers || '' }} aws_role_arn: ${{ secrets.AWS_ROLE_ARN }} aws_region: 'us-west-2' pat_token: ${{ secrets.GITHUB_TOKEN }} From 7c1e252f87ddfc5da975fd8a1cadef322a92d1a3 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 22:29:26 +0000 Subject: [PATCH 07/21] debug: verify harness file exists after build --- .github/workflows/strands-command.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index e642a5963..7e0fa80e7 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -115,6 +115,9 @@ jobs: npm run build npm run build:harness npm install -g "$(npm pack | tail -1)" + echo "--- Verifying harness exists ---" + ls -la dist/mcp-harness/index.mjs + node -e "require('fs').accessSync('${{ github.workspace }}/dist/mcp-harness/index.mjs'); console.log('File accessible')" - name: Set MCP harness path if: steps.process-inputs.outputs.mode == 'tester' From 3c840b4b476d2bdc11478214a53664137b76d1e2 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 22:32:29 +0000 Subject: [PATCH 08/21] fix: copy harness to /tmp before strands action checkout wipes dist/ --- .github/workflows/strands-command.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index 7e0fa80e7..326ee1e3f 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -115,14 +115,12 @@ jobs: npm run build npm run build:harness npm install -g "$(npm pack | tail -1)" - echo "--- Verifying harness exists ---" - ls -la dist/mcp-harness/index.mjs - node -e "require('fs').accessSync('${{ github.workspace }}/dist/mcp-harness/index.mjs'); console.log('File accessible')" + cp dist/mcp-harness/index.mjs /tmp/mcp-harness.mjs - name: Set MCP harness path if: steps.process-inputs.outputs.mode == 'tester' id: mcp-config - run: echo "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"${{ github.workspace }}/dist/mcp-harness/index.mjs\"]}}}" >> "$GITHUB_OUTPUT" + run: echo "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"/tmp/mcp-harness.mjs\"]}}}" >> "$GITHUB_OUTPUT" - name: Run Strands Agent uses: ./.github/actions/strands-action From 9c73cee6be8a89082c3c7a1bd1b718bf23cdf6d9 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 22:35:15 +0000 Subject: [PATCH 09/21] fix: install harness external deps to /tmp/mcp-harness alongside the bundle --- .github/workflows/strands-command.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index 326ee1e3f..cd4b27ce7 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -115,12 +115,14 @@ jobs: npm run build npm run build:harness npm install -g "$(npm pack | tail -1)" - cp dist/mcp-harness/index.mjs /tmp/mcp-harness.mjs + mkdir -p /tmp/mcp-harness + cp dist/mcp-harness/index.mjs /tmp/mcp-harness/index.mjs + cd /tmp/mcp-harness && npm init -y && npm install node-pty @xterm/headless express - name: Set MCP harness path if: steps.process-inputs.outputs.mode == 'tester' id: mcp-config - run: echo "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"/tmp/mcp-harness.mjs\"]}}}" >> "$GITHUB_OUTPUT" + run: echo "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"/tmp/mcp-harness/index.mjs\"]}}}" >> "$GITHUB_OUTPUT" - name: Run Strands Agent uses: ./.github/actions/strands-action From cfe4454652fe0c0d14c2df0be4cb6da085e73948 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 23:20:10 +0000 Subject: [PATCH 10/21] feat: save SVG screenshots as artifacts on every TUI test flow --- .github/agent-sops/task-tester.sop.md | 9 ++++++--- .github/workflows/strands-command.yml | 8 ++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index 382be5c7a..63c275e82 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -35,13 +35,16 @@ For each flow in the test spec: 3. Follow the flow steps: use `tui_action` (preferred โ€” combines send + wait + read in one call) or `tui_wait_for` + `tui_send_keys` for multi-step interactions 4. Verify each expectation against the screen content -5. On **pass**: record the flow name as passed -6. On **failure**: use `tui_screenshot` to capture the terminal state, record the flow name, expected behavior, actual - behavior, and the screenshot text +5. On **pass**: use `tui_screenshot` with `format: "svg"` and `savePath: "/tmp/tui-screenshots/-pass.svg"` to + capture the final state, then record the flow name as passed +6. On **failure**: use `tui_screenshot` with `format: "svg"` and `savePath: "/tmp/tui-screenshots/-fail.svg"` + to capture the terminal state, also take a text screenshot for the PR comment, record the flow name, expected + behavior, actual behavior, and the screenshot text 7. Always `tui_close` the session when done, even on failure **Constraints:** +- Create `/tmp/tui-screenshots/` at the start before running any flows - Use `timeoutMs: 10000` (10 seconds) minimum for all `tui_wait_for` and `tui_action` pattern waits - Use small terminal dimensions: `cols: 100, rows: 24` - If a wait times out, retry once before declaring failure diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index cd4b27ce7..76182466e 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -141,6 +141,14 @@ jobs: S3_SESSION_BUCKET: ${{ secrets.AGENT_SESSIONS_BUCKET }} BRANCH_NAME: ${{ steps.process-inputs.outputs.branch_name }} + - name: Upload TUI screenshots + if: always() && steps.process-inputs.outputs.mode == 'tester' + uses: actions/upload-artifact@v4 + with: + name: tui-screenshots + path: /tmp/tui-screenshots/ + if-no-files-found: ignore + - name: Remove strands-running label if: always() uses: actions/github-script@v8 From 1801a184ff7864501c352a803dbf1baf72161e15 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 23:28:10 +0000 Subject: [PATCH 11/21] fix: remove contradictory text-only constraint, strengthen SVG screenshot requirement --- .github/agent-sops/task-tester.sop.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index 63c275e82..1ae8de368 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -35,21 +35,19 @@ For each flow in the test spec: 3. Follow the flow steps: use `tui_action` (preferred โ€” combines send + wait + read in one call) or `tui_wait_for` + `tui_send_keys` for multi-step interactions 4. Verify each expectation against the screen content -5. On **pass**: use `tui_screenshot` with `format: "svg"` and `savePath: "/tmp/tui-screenshots/-pass.svg"` to - capture the final state, then record the flow name as passed -6. On **failure**: use `tui_screenshot` with `format: "svg"` and `savePath: "/tmp/tui-screenshots/-fail.svg"` - to capture the terminal state, also take a text screenshot for the PR comment, record the flow name, expected - behavior, actual behavior, and the screenshot text +5. **MUST** take a screenshot before closing every session: call `tui_screenshot` with `sessionId`, `format: "svg"`, and + `savePath: "/tmp/tui-screenshots/.svg"` (use kebab-case for flow names, e.g. `help-text.svg`, + `create-wizard.svg`). This is required for both pass and fail. +6. On **failure**: also take a text-format screenshot for the PR comment body. Record the flow name, expected behavior, + actual behavior, and the text screenshot. 7. Always `tui_close` the session when done, even on failure **Constraints:** -- Create `/tmp/tui-screenshots/` at the start before running any flows +- Run `mkdir -p /tmp/tui-screenshots` via `shell` as your very first action +- Every flow MUST produce an SVG file in `/tmp/tui-screenshots/` โ€” if a flow has no screenshot, it is considered + incomplete - Use `timeoutMs: 10000` (10 seconds) minimum for all `tui_wait_for` and `tui_action` pattern waits -- Use small terminal dimensions: `cols: 100, rows: 24` -- If a wait times out, retry once before declaring failure -- Use text format screenshots only (not SVG) -- Keep terminal dimensions consistent across all flows ### 3. Post Results From 73028e0daa973be9cfda93843723b12d5844f327 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 23:30:13 +0000 Subject: [PATCH 12/21] feat: simplify to 2 focused TUI test flows (help + add agent/status) --- .github/agent-sops/tui-test-flows.md | 47 +++++++--------------------- 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md index e772235d6..8f04777a0 100644 --- a/.github/agent-sops/tui-test-flows.md +++ b/.github/agent-sops/tui-test-flows.md @@ -4,47 +4,22 @@ Each flow describes a user interaction to verify. The tester agent drives these --- -## Flow: Help text lists all subcommands +## Flow: Help text lists all commands 1. Launch: `agentcore --help` (use `tui_launch` with `command: "agentcore"`, `args: ["--help"]`) 2. Wait for: "Usage:" on screen -3. Expect all of these subcommands visible: `create`, `deploy`, `invoke`, `status`, `logs`, `add`, `remove` +3. Expect all of these commands visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs` 4. Close session --- -## Flow: Create wizard prompts for project name +## Flow: Add agent then verify status shows local-only -1. Launch: `agentcore create` (no flags, in a temp directory) -2. Wait for: a prompt asking for the project name (look for "name" or "project") -3. Expect: an input field or prompt is visible -4. Close session (Ctrl+C) - ---- - -## Flow: Create with --json produces valid JSON - -1. In a temp directory, run via shell: - `agentcore create --name TestProj --language Python --framework Strands --model-provider Bedrock --memory none --json` -2. Expect: stdout contains valid JSON with `"success": true` and `"projectPath"` -3. Verify the project directory was created - ---- - -## Flow: Add agent shows framework selection - -1. First create a project via shell: `agentcore create --name AgentTest --no-agent --json` (in a temp directory) -2. Launch: `agentcore add agent` in the created project directory -3. Wait for: agent name prompt -4. Type a name, press Enter -5. Wait for: framework or language selection to appear -6. Expect: at least "Strands" and "LangChain_LangGraph" visible as options -7. Close session (Ctrl+C) - ---- - -## Flow: Invalid project name shows error - -1. In a temp directory, run via shell: - `agentcore create --name "123invalid" --language Python --framework Strands --model-provider Bedrock --memory none --json` -2. Expect: exit code is non-zero OR output contains an error about the project name (must start with a letter) +1. Create a project via shell: `agentcore create --name TestStatus --no-agent --json` (in a temp directory) +2. Add an agent via shell: + `agentcore add agent --name MyAgent --language Python --framework Strands --model-provider Bedrock --json` (in the + project directory) +3. Launch: `agentcore status` in the project directory (use `tui_launch`) +4. Wait for: the status table to render (look for "MyAgent" or "agent" on screen) +5. Expect: "MyAgent" appears with a "local-only" state +6. Close session From 4300ce5823fccca9f8559bdc1ce73ce7aa8d5663 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Thu, 2 Apr 2026 23:48:45 +0000 Subject: [PATCH 13/21] feat: rewrite flows to drive interactive TUI wizard instead of CLI flags --- .github/agent-sops/tui-test-flows.md | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md index 8f04777a0..bc07e17c8 100644 --- a/.github/agent-sops/tui-test-flows.md +++ b/.github/agent-sops/tui-test-flows.md @@ -2,24 +2,38 @@ Each flow describes a user interaction to verify. The tester agent drives these using the TUI harness MCP tools. +All flows use `tui_launch` with `command: "agentcore"` and the appropriate `args`. Use `cols: 100, rows: 24`. + --- ## Flow: Help text lists all commands -1. Launch: `agentcore --help` (use `tui_launch` with `command: "agentcore"`, `args: ["--help"]`) +1. Launch: `agentcore --help` 2. Wait for: "Usage:" on screen 3. Expect all of these commands visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs` 4. Close session --- -## Flow: Add agent then verify status shows local-only +## Flow: Create project with agent via TUI wizard + +This flow drives the full interactive create wizard โ€” no `--json` flags. -1. Create a project via shell: `agentcore create --name TestStatus --no-agent --json` (in a temp directory) -2. Add an agent via shell: - `agentcore add agent --name MyAgent --language Python --framework Strands --model-provider Bedrock --json` (in the - project directory) -3. Launch: `agentcore status` in the project directory (use `tui_launch`) -4. Wait for: the status table to render (look for "MyAgent" or "agent" on screen) -5. Expect: "MyAgent" appears with a "local-only" state -6. Close session +1. Create a temp directory via `shell`: `mktemp -d` +2. Launch: `agentcore create` with `cwd` set to the temp directory +3. Wait for: "Project name" prompt +4. Type a project name (e.g. `TuiTest`), press Enter +5. Wait for: "Would you like to add an agent" selection +6. Expect: "Yes, add an agent" is visible and selected (has `โฏ` marker) +7. Press Enter to select "Yes, add an agent" +8. Wait for: "Agent name" prompt inside the Add Agent wizard +9. Accept the default name or type one, press Enter +10. Wait for: "Select agent type" selection +11. Expect: "Create new agent" is visible +12. Press Enter to select it +13. Wait for: "Language" step with "Python" visible +14. Press Enter to select Python +15. Continue pressing Enter through remaining steps (Build, Protocol, Framework, Model) accepting defaults until you + reach a "Confirm" or completion screen +16. Expect: the wizard completes โ€” look for a success message or the process exits +17. Close session From 568afe3e59c49a6f20607a2d48aa09a55e40c484 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 00:14:25 +0000 Subject: [PATCH 14/21] fix: instruct agent to take screenshots before process exits --- .github/agent-sops/tui-test-flows.md | 35 +++++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md index bc07e17c8..bafcba3f2 100644 --- a/.github/agent-sops/tui-test-flows.md +++ b/.github/agent-sops/tui-test-flows.md @@ -4,14 +4,21 @@ Each flow describes a user interaction to verify. The tester agent drives these All flows use `tui_launch` with `command: "agentcore"` and the appropriate `args`. Use `cols: 100, rows: 24`. +**Important screenshot rule:** Take the SVG screenshot BEFORE the process exits. For commands that exit immediately +(like `--help`), use `tui_wait_for` to wait for expected content, then immediately take the screenshot while the session +is still alive. For interactive wizards, take the screenshot at the most interesting step (e.g. the final confirmation +screen) before pressing the last Enter. + --- ## Flow: Help text lists all commands 1. Launch: `agentcore --help` -2. Wait for: "Usage:" on screen -3. Expect all of these commands visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs` -4. Close session +2. Use `tui_wait_for` to wait for "Usage:" on screen +3. Immediately take SVG screenshot (the session may still be alive briefly after output) +4. Read the screen content +5. Expect all of these commands visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs` +6. Close session --- @@ -24,16 +31,16 @@ This flow drives the full interactive create wizard โ€” no `--json` flags. 3. Wait for: "Project name" prompt 4. Type a project name (e.g. `TuiTest`), press Enter 5. Wait for: "Would you like to add an agent" selection -6. Expect: "Yes, add an agent" is visible and selected (has `โฏ` marker) +6. Expect: "Yes, add an agent" is visible 7. Press Enter to select "Yes, add an agent" 8. Wait for: "Agent name" prompt inside the Add Agent wizard -9. Accept the default name or type one, press Enter -10. Wait for: "Select agent type" selection -11. Expect: "Create new agent" is visible -12. Press Enter to select it -13. Wait for: "Language" step with "Python" visible -14. Press Enter to select Python -15. Continue pressing Enter through remaining steps (Build, Protocol, Framework, Model) accepting defaults until you - reach a "Confirm" or completion screen -16. Expect: the wizard completes โ€” look for a success message or the process exits -17. Close session +9. Accept the default name, press Enter +10. Wait for: "Select agent type" โ€” expect "Create new agent" visible +11. Press Enter to select it +12. Wait for: "Language" step with "Python" visible +13. Press Enter to select Python +14. Continue pressing Enter through remaining steps (Build, Protocol, Framework, Model) accepting defaults +15. When you reach the "Confirm" step, take the SVG screenshot BEFORE pressing the final Enter +16. Press Enter to confirm +17. Wait for the process to exit or a success message +18. Close session From f83d625a1802ce316deba49749a3a5791d9d262e Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 12:31:47 +0000 Subject: [PATCH 15/21] feat: support ad-hoc test flows via /strands test --- .github/agent-sops/task-tester.sop.md | 13 +++++++++++-- .github/scripts/javascript/process-inputs.cjs | 13 ++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index 1ae8de368..e26042315 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -19,9 +19,18 @@ not the fork repository. ## Steps -### 1. Setup +### 1. Determine Mode + +Check the command text in the prompt: + +- If the command is just `test` (no additional text): run **all predefined flows** from + `.github/agent-sops/tui-test-flows.md` +- If the command is `test ` (has text after "test"): run **only the described ad-hoc flow**. The text after + "test" describes what to test. Design the flow yourself using the TUI harness tools, following the same patterns as + the predefined flows. + +### 2. Setup -- Read the test spec file at `.github/agent-sops/tui-test-flows.md` - The CLI is installed globally as `agentcore`. Launch TUI sessions using `tui_launch` with `command: "agentcore"` and the appropriate `args`. - For non-interactive commands (e.g., `--json` output), prefer `shell` over `tui_launch`. diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs index f1c915c68..e3a505e73 100644 --- a/.github/scripts/javascript/process-inputs.cjs +++ b/.github/scripts/javascript/process-inputs.cjs @@ -87,7 +87,18 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) let prompt = isPullRequest ? 'The pull request id is:' : 'The issue id is:'; prompt += `${issueId}\n`; prompt += `The repository is: aws/agentcore-cli\n`; - prompt += `${command}\nreview and continue`; + + if (mode === 'tester') { + const flowDescription = command.replace(/^test\s*/, '').trim(); + if (flowDescription) { + prompt += `Run this ad-hoc test flow: ${flowDescription}\n`; + } else { + prompt += `Run all predefined test flows from .github/agent-sops/tui-test-flows.md\n`; + } + } else { + prompt += `${command}\n`; + } + prompt += 'review and continue'; return { sessionId, systemPrompt, prompt }; } From 7350b24a10a97bc7ad67c5a9e96e58cabe4a28dd Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 12:56:40 +0000 Subject: [PATCH 16/21] fix: make SVG screenshot requirement unambiguous with exact tool call pattern --- .github/agent-sops/task-tester.sop.md | 31 +++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index e26042315..1ec379e60 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -35,27 +35,40 @@ Check the command text in the prompt: the appropriate `args`. - For non-interactive commands (e.g., `--json` output), prefer `shell` over `tui_launch`. -### 2. Run Test Flows +### 3. Run Test Flows -For each flow in the test spec: +For each flow: 1. Create any required setup (e.g., temp directories, minimal projects) using `shell` 2. Use `tui_launch` to start the CLI with the specified arguments and `cwd` 3. Follow the flow steps: use `tui_action` (preferred โ€” combines send + wait + read in one call) or `tui_wait_for` + `tui_send_keys` for multi-step interactions 4. Verify each expectation against the screen content -5. **MUST** take a screenshot before closing every session: call `tui_screenshot` with `sessionId`, `format: "svg"`, and - `savePath: "/tmp/tui-screenshots/.svg"` (use kebab-case for flow names, e.g. `help-text.svg`, - `create-wizard.svg`). This is required for both pass and fail. -6. On **failure**: also take a text-format screenshot for the PR comment body. Record the flow name, expected behavior, - actual behavior, and the text screenshot. +5. Take a screenshot โ€” see Screenshot Rules below +6. On **failure**: also read the screen text for the PR comment body. Record the flow name, expected behavior, actual + behavior, and the screen text. 7. Always `tui_close` the session when done, even on failure +### Screenshot Rules + +**NEVER save .txt files. ONLY save .svg files.** + +Every flow MUST produce exactly one SVG screenshot saved to `/tmp/tui-screenshots/`. Use this exact tool call pattern: + +``` +tui_screenshot(sessionId=, format="svg", savePath="/tmp/tui-screenshots/.svg") +``` + +- File extension MUST be `.svg`, NEVER `.txt` or `.png` +- The `format` parameter MUST be `"svg"`, NEVER `"text"` +- Take the screenshot WHILE the TUI session is still alive (before the process exits) +- For commands that exit immediately (like `--help`): take the screenshot right after `tui_wait_for` succeeds +- For interactive wizards: take the screenshot at the most interesting step before pressing the final Enter +- If a session has already exited, that flow's screenshot is skipped โ€” do NOT save a text file as a substitute + **Constraints:** - Run `mkdir -p /tmp/tui-screenshots` via `shell` as your very first action -- Every flow MUST produce an SVG file in `/tmp/tui-screenshots/` โ€” if a flow has no screenshot, it is considered - incomplete - Use `timeoutMs: 10000` (10 seconds) minimum for all `tui_wait_for` and `tui_action` pattern waits ### 3. Post Results From 0950297ea7f94201664b52e958e67e3795164c7f Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 14:03:36 +0000 Subject: [PATCH 17/21] style: format strands-command.yml --- .github/workflows/strands-command.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index 76182466e..fc8ef0bc2 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -122,7 +122,10 @@ jobs: - name: Set MCP harness path if: steps.process-inputs.outputs.mode == 'tester' id: mcp-config - run: echo "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"/tmp/mcp-harness/index.mjs\"]}}}" >> "$GITHUB_OUTPUT" + run: + echo + "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"/tmp/mcp-harness/index.mjs\"]}}}" + >> "$GITHUB_OUTPUT" - name: Run Strands Agent uses: ./.github/actions/strands-action From 3da9f3465dfc3469bdc76fe5cefa8518359f342e Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 14:11:20 +0000 Subject: [PATCH 18/21] refactor: clean SOP/flows separation, rewrite SOP, use mapping for mode detection --- .github/agent-sops/task-tester.sop.md | 94 +++++++------------ .github/agent-sops/tui-test-flows.md | 45 +++------ .github/scripts/javascript/process-inputs.cjs | 12 +-- 3 files changed, 50 insertions(+), 101 deletions(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index 1ec379e60..d5cf04ce1 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -2,78 +2,52 @@ ## Role -You are a TUI Tester. Your goal is to verify the AgentCore CLI's interactive TUI behavior by driving it through -predefined test flows using the TUI harness MCP tools. You post results as PR comments. +You are a CLI and TUI tester for the AgentCore CLI. You verify both interactive TUI behavior and non-interactive CLI +commands. You drive the CLI using TUI harness tools and shell commands, then post results as PR comments. You MUST NOT modify any code, create branches, or push commits. Your only output is test result comments. -## Tools Available +## Tools -You have TUI harness MCP tools: `tui_launch`, `tui_send_keys`, `tui_action`, `tui_wait_for`, `tui_screenshot`, -`tui_read_screen`, `tui_close`, `tui_list_sessions`. +- **TUI harness** (MCP tools): `tui_launch`, `tui_send_keys`, `tui_action`, `tui_wait_for`, `tui_screenshot`, + `tui_read_screen`, `tui_close`, `tui_list_sessions` โ€” for interactive TUI testing +- **`shell`** โ€” for non-interactive CLI commands, setup (temp dirs, project scaffolding), and verification +- **GitHub tools** โ€” for posting PR comments. Always use `aws/agentcore-cli` as the repository, not the fork. -You also have `shell` for setup commands and GitHub tools for posting comments. - -**Important:** Always use `aws/agentcore-cli` as the repository for all GitHub API calls (get PR, post comments, etc.), -not the fork repository. - -## Steps - -### 1. Determine Mode +## What to Test Check the command text in the prompt: -- If the command is just `test` (no additional text): run **all predefined flows** from - `.github/agent-sops/tui-test-flows.md` -- If the command is `test ` (has text after "test"): run **only the described ad-hoc flow**. The text after - "test" describes what to test. Design the flow yourself using the TUI harness tools, following the same patterns as - the predefined flows. - -### 2. Setup +- `Run all predefined test flows` โ†’ read and execute every flow from `.github/agent-sops/tui-test-flows.md` +- `Run this ad-hoc test flow: ` โ†’ design and execute a single flow matching the description -- The CLI is installed globally as `agentcore`. Launch TUI sessions using `tui_launch` with `command: "agentcore"` and - the appropriate `args`. -- For non-interactive commands (e.g., `--json` output), prefer `shell` over `tui_launch`. +## General Rules -### 3. Run Test Flows - -For each flow: - -1. Create any required setup (e.g., temp directories, minimal projects) using `shell` -2. Use `tui_launch` to start the CLI with the specified arguments and `cwd` -3. Follow the flow steps: use `tui_action` (preferred โ€” combines send + wait + read in one call) or `tui_wait_for` + - `tui_send_keys` for multi-step interactions -4. Verify each expectation against the screen content -5. Take a screenshot โ€” see Screenshot Rules below -6. On **failure**: also read the screen text for the PR comment body. Record the flow name, expected behavior, actual - behavior, and the screen text. -7. Always `tui_close` the session when done, even on failure +- The CLI is installed globally as `agentcore` +- Use `tui_launch` with `command: "agentcore"` for interactive commands. Use `shell` for non-interactive ones. +- Terminal dimensions: `cols: 100, rows: 24` for all TUI sessions +- Use `timeoutMs: 10000` minimum for all `tui_wait_for` and `tui_action` calls +- If a wait times out, retry once before declaring failure +- Always `tui_close` sessions when done, even on failure +- Run `mkdir -p /tmp/tui-screenshots` via `shell` as your very first action -### Screenshot Rules +## Screenshot Rules **NEVER save .txt files. ONLY save .svg files.** -Every flow MUST produce exactly one SVG screenshot saved to `/tmp/tui-screenshots/`. Use this exact tool call pattern: +Use this exact tool call pattern for every flow: ``` tui_screenshot(sessionId=, format="svg", savePath="/tmp/tui-screenshots/.svg") ``` -- File extension MUST be `.svg`, NEVER `.txt` or `.png` -- The `format` parameter MUST be `"svg"`, NEVER `"text"` -- Take the screenshot WHILE the TUI session is still alive (before the process exits) -- For commands that exit immediately (like `--help`): take the screenshot right after `tui_wait_for` succeeds -- For interactive wizards: take the screenshot at the most interesting step before pressing the final Enter -- If a session has already exited, that flow's screenshot is skipped โ€” do NOT save a text file as a substitute +- `format` MUST be `"svg"`, NEVER `"text"` +- Take the screenshot WHILE the session is still alive (before the process exits) +- If a session has already exited, skip the screenshot โ€” do NOT save a text file as a substitute -**Constraints:** +## Post Results -- Run `mkdir -p /tmp/tui-screenshots` via `shell` as your very first action -- Use `timeoutMs: 10000` (10 seconds) minimum for all `tui_wait_for` and `tui_action` pattern waits - -### 3. Post Results - -Post a single summary comment on the PR with this format: +Post a single PR comment: ```markdown ## ๐Ÿงช TUI Test Results @@ -89,15 +63,12 @@ Post a single summary comment on the PR with this format: #### Flow name 3 -**Expected:** description of what should have happened **Actual:** description of what happened +**Expected:** what should have happened **Actual:** what happened
Screenshot -``` - -(terminal screenshot here) -``` +(paste screen text here)
``` @@ -106,9 +77,8 @@ If all flows pass, omit the Failed section. ## Forbidden Actions -- You MUST NOT modify, create, or delete any source files -- You MUST NOT run git add, git commit, or git push -- You MUST NOT create or update branches -- You MUST NOT approve or merge the pull request -- You MUST NOT run deploy, invoke, or any command that creates AWS resources -- Your ONLY output is test result comments on the pull request +- Do NOT modify, create, or delete source files +- Do NOT run git commands (add, commit, push) +- Do NOT create or update branches +- Do NOT approve or merge the pull request +- Do NOT deploy or create AWS resources diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md index bafcba3f2..74c255787 100644 --- a/.github/agent-sops/tui-test-flows.md +++ b/.github/agent-sops/tui-test-flows.md @@ -1,46 +1,27 @@ # TUI Test Flows -Each flow describes a user interaction to verify. The tester agent drives these using the TUI harness MCP tools. - -All flows use `tui_launch` with `command: "agentcore"` and the appropriate `args`. Use `cols: 100, rows: 24`. - -**Important screenshot rule:** Take the SVG screenshot BEFORE the process exits. For commands that exit immediately -(like `--help`), use `tui_wait_for` to wait for expected content, then immediately take the screenshot while the session -is still alive. For interactive wizards, take the screenshot at the most interesting step (e.g. the final confirmation -screen) before pressing the last Enter. - --- ## Flow: Help text lists all commands 1. Launch: `agentcore --help` -2. Use `tui_wait_for` to wait for "Usage:" on screen -3. Immediately take SVG screenshot (the session may still be alive briefly after output) -4. Read the screen content -5. Expect all of these commands visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs` -6. Close session +2. Wait for "Usage:" on screen +3. Take SVG screenshot immediately (before the process exits) +4. Verify these commands are visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs` +5. Close session --- ## Flow: Create project with agent via TUI wizard -This flow drives the full interactive create wizard โ€” no `--json` flags. - 1. Create a temp directory via `shell`: `mktemp -d` 2. Launch: `agentcore create` with `cwd` set to the temp directory -3. Wait for: "Project name" prompt -4. Type a project name (e.g. `TuiTest`), press Enter -5. Wait for: "Would you like to add an agent" selection -6. Expect: "Yes, add an agent" is visible -7. Press Enter to select "Yes, add an agent" -8. Wait for: "Agent name" prompt inside the Add Agent wizard -9. Accept the default name, press Enter -10. Wait for: "Select agent type" โ€” expect "Create new agent" visible -11. Press Enter to select it -12. Wait for: "Language" step with "Python" visible -13. Press Enter to select Python -14. Continue pressing Enter through remaining steps (Build, Protocol, Framework, Model) accepting defaults -15. When you reach the "Confirm" step, take the SVG screenshot BEFORE pressing the final Enter -16. Press Enter to confirm -17. Wait for the process to exit or a success message -18. Close session +3. Wait for "Project name" prompt, type `TuiTest`, press Enter +4. Wait for "Would you like to add an agent" โ€” expect "Yes, add an agent" visible, press Enter +5. Wait for "Agent name" prompt, accept the default, press Enter +6. Wait for "Select agent type" โ€” expect "Create new agent" visible, press Enter +7. Wait for "Language" step โ€” expect "Python" visible, press Enter +8. Continue pressing Enter through remaining steps (Build, Protocol, Framework, Model) accepting defaults +9. At the "Confirm" step, take SVG screenshot, then press Enter +10. Wait for the process to exit or a success message +11. Close session diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs index e3a505e73..4d9c51a42 100644 --- a/.github/scripts/javascript/process-inputs.cjs +++ b/.github/scripts/javascript/process-inputs.cjs @@ -108,13 +108,11 @@ module.exports = async (context, github, core, inputs) => { const { issueId, command, issue } = await getIssueInfo(github, context, inputs); const isPullRequest = !!issue.data.pull_request; - const mode = command.startsWith('test') - ? 'tester' - : command.startsWith('review') - ? 'reviewer' - : isPullRequest || command.startsWith('implement') - ? 'implementer' - : 'refiner'; + + const COMMAND_MODES = { test: 'tester', review: 'reviewer', implement: 'implementer' }; + const mode = + Object.entries(COMMAND_MODES).find(([prefix]) => command.startsWith(prefix))?.[1] ?? + (isPullRequest ? 'implementer' : 'refiner'); console.log(`Is PR: ${isPullRequest}, Mode: ${mode}`); const branchName = await determineBranch(github, context, issueId, mode, isPullRequest); From f2419b2b172731383f7aa683e4b209b7befc4e8e Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 15:22:20 +0000 Subject: [PATCH 19/21] fix: use tui_read_screen text in PR comments, add verification section to SOP --- .github/agent-sops/task-tester.sop.md | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index d5cf04ce1..8b0da621b 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -66,15 +66,32 @@ Post a single PR comment: **Expected:** what should have happened **Actual:** what happened
-Screenshot +Terminal output +``` + +(paste tui_read_screen text output here) -(paste screen text here) +```
``` If all flows pass, omit the Failed section. +For failures, use `tui_read_screen` to capture the terminal text and paste it in the comment. SVG screenshots are +uploaded as workflow artifacts separately โ€” do not try to embed them in the comment. + +## Verification + +After each flow completes, verify the side effects โ€” not just the TUI output: + +- If a project was created: use `shell` to check the directory exists and contains expected files (e.g. + `agentcore.json`) +- If a resource was added: use `shell` to check the config file was updated +- If a command produced output: verify the output matches expectations + +Do not rely solely on what the TUI displays. Confirm the CLI actually did what it claimed. + ## Forbidden Actions - Do NOT modify, create, or delete source files From 9ea6d4a687ea8c991ee31ab6f62525eabb4cddf2 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 15:37:16 +0000 Subject: [PATCH 20/21] fix: allow deploy in test flows when explicitly requested --- .github/agent-sops/task-tester.sop.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md index 8b0da621b..41f005cab 100644 --- a/.github/agent-sops/task-tester.sop.md +++ b/.github/agent-sops/task-tester.sop.md @@ -98,4 +98,4 @@ Do not rely solely on what the TUI displays. Confirm the CLI actually did what i - Do NOT run git commands (add, commit, push) - Do NOT create or update branches - Do NOT approve or merge the pull request -- Do NOT deploy or create AWS resources +- Do NOT deploy or create AWS resources unless the test flow explicitly requires it From 726573b41c39945f8e5e547552070fef84388454 Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 3 Apr 2026 17:35:35 +0000 Subject: [PATCH 21/21] fix: pass target_repo as explicit param instead of overriding context.repo, add comment on continue-on-error --- .github/scripts/javascript/process-inputs.cjs | 50 +++++++++++-------- .github/workflows/strands-command.yml | 14 +++--- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs index 4d9c51a42..b97538dc7 100644 --- a/.github/scripts/javascript/process-inputs.cjs +++ b/.github/scripts/javascript/process-inputs.cjs @@ -4,48 +4,48 @@ const fs = require('fs'); -async function getIssueInfo(github, context, inputs) { +async function getIssueInfo(github, repo, inputs, eventName, payload) { let issueId; - if (context.eventName === 'workflow_dispatch') { + if (eventName === 'workflow_dispatch') { issueId = inputs.issue_id; } else { // Handle both issue comments and PR comments - issueId = (context.payload.issue?.number || context.payload.pull_request?.number)?.toString(); + issueId = (payload.issue?.number || payload.pull_request?.number)?.toString(); } const command = - context.eventName === 'workflow_dispatch' + eventName === 'workflow_dispatch' ? inputs.command - : context.payload.comment.body.match(/^\/strands\s*(.*)$/)?.[1]?.trim() || ''; + : payload.comment.body.match(/^\/strands\s*(.*)$/)?.[1]?.trim() || ''; - console.log(`Event: ${context.eventName}, Issue ID: ${issueId}, Command: "${command}"`); + console.log(`Event: ${eventName}, Issue ID: ${issueId}, Command: "${command}"`); const issue = await github.rest.issues.get({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, issue_number: issueId, }); return { issueId, command, issue }; } -async function determineBranch(github, context, issueId, mode, isPullRequest) { +async function determineBranch(github, repo, issueId, mode, isPullRequest) { let branchName = 'main'; if (mode === 'implementer' && !isPullRequest) { branchName = `agent-tasks/${issueId}`; const mainRef = await github.rest.git.getRef({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, ref: 'heads/main', }); try { await github.rest.git.createRef({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, ref: `refs/heads/${branchName}`, sha: mainRef.data.object.sha, }); @@ -59,8 +59,8 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) { } } else if (isPullRequest) { const pr = await github.rest.pulls.get({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, pull_number: issueId, }); branchName = pr.data.head.ref; @@ -69,7 +69,7 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) { return branchName; } -function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) { +function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, repo) { const sessionId = inputs.session_id || (mode === 'implementer' ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-') : `${mode}-${issueId}`); @@ -86,7 +86,7 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) let prompt = isPullRequest ? 'The pull request id is:' : 'The issue id is:'; prompt += `${issueId}\n`; - prompt += `The repository is: aws/agentcore-cli\n`; + prompt += `The repository is: ${repo.owner}/${repo.repo}\n`; if (mode === 'tester') { const flowDescription = command.replace(/^test\s*/, '').trim(); @@ -105,7 +105,9 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) module.exports = async (context, github, core, inputs) => { try { - const { issueId, command, issue } = await getIssueInfo(github, context, inputs); + const repo = inputs.target_repo || { owner: context.repo.owner, repo: context.repo.repo }; + + const { issueId, command, issue } = await getIssueInfo(github, repo, inputs, context.eventName, context.payload); const isPullRequest = !!issue.data.pull_request; @@ -115,10 +117,18 @@ module.exports = async (context, github, core, inputs) => { (isPullRequest ? 'implementer' : 'refiner'); console.log(`Is PR: ${isPullRequest}, Mode: ${mode}`); - const branchName = await determineBranch(github, context, issueId, mode, isPullRequest); + const branchName = await determineBranch(github, repo, issueId, mode, isPullRequest); console.log(`Building prompts - mode: ${mode}, issue: ${issueId}, is PR: ${isPullRequest}`); - const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs); + const { sessionId, systemPrompt, prompt } = buildPrompts( + mode, + issueId, + isPullRequest, + command, + branchName, + inputs, + repo + ); console.log(`Session ID: ${sessionId}`); console.log(`Task prompt: "${prompt}"`); diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index fc8ef0bc2..92482446d 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -70,6 +70,8 @@ jobs: fetch-depth: 0 - name: Add strands-running label + # continue-on-error: workflow_dispatch from a fork targets the fork repo + # where the upstream issue/PR doesn't exist, causing a 404. continue-on-error: true uses: actions/github-script@v8 with: @@ -87,17 +89,15 @@ jobs: uses: actions/github-script@v8 with: script: | - // When dispatched from a fork, override the repo getter to point at upstream - if (context.eventName === 'workflow_dispatch' && context.repo.owner !== 'aws') { - Object.defineProperty(context, 'repo', { - get: () => ({ owner: 'aws', repo: 'agentcore-cli' }) - }); - } const processInputs = require('./.github/scripts/javascript/process-inputs.cjs'); const inputs = { issue_id: '${{ inputs.issue_id }}', command: '${{ inputs.command }}', - session_id: '${{ inputs.session_id }}' + session_id: '${{ inputs.session_id }}', + // When dispatched from a fork, target the upstream repo for API calls + ...(context.eventName === 'workflow_dispatch' && context.repo.owner !== 'aws' + ? { target_repo: { owner: 'aws', repo: 'agentcore-cli' } } + : {}), }; await processInputs(context, github, core, inputs);