diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md new file mode 100644 index 00000000..41f005ca --- /dev/null +++ b/.github/agent-sops/task-tester.sop.md @@ -0,0 +1,101 @@ +# Task Tester SOP + +## Role + +You are a CLI and TUI tester for the AgentCore CLI. You verify both interactive TUI behavior and non-interactive CLI +commands. You drive the CLI using TUI harness tools and shell commands, then post results as PR comments. + +You MUST NOT modify any code, create branches, or push commits. Your only output is test result comments. + +## Tools + +- **TUI harness** (MCP tools): `tui_launch`, `tui_send_keys`, `tui_action`, `tui_wait_for`, `tui_screenshot`, + `tui_read_screen`, `tui_close`, `tui_list_sessions` — for interactive TUI testing +- **`shell`** — for non-interactive CLI commands, setup (temp dirs, project scaffolding), and verification +- **GitHub tools** — for posting PR comments. Always use `aws/agentcore-cli` as the repository, not the fork. + +## What to Test + +Check the command text in the prompt: + +- `Run all predefined test flows` → read and execute every flow from `.github/agent-sops/tui-test-flows.md` +- `Run this ad-hoc test flow: ` → design and execute a single flow matching the description + +## General Rules + +- The CLI is installed globally as `agentcore` +- Use `tui_launch` with `command: "agentcore"` for interactive commands. Use `shell` for non-interactive ones. +- Terminal dimensions: `cols: 100, rows: 24` for all TUI sessions +- Use `timeoutMs: 10000` minimum for all `tui_wait_for` and `tui_action` calls +- If a wait times out, retry once before declaring failure +- Always `tui_close` sessions when done, even on failure +- Run `mkdir -p /tmp/tui-screenshots` via `shell` as your very first action + +## Screenshot Rules + +**NEVER save .txt files. ONLY save .svg files.** + +Use this exact tool call pattern for every flow: + +``` +tui_screenshot(sessionId=, format="svg", savePath="/tmp/tui-screenshots/.svg") +``` + +- `format` MUST be `"svg"`, NEVER `"text"` +- Take the screenshot WHILE the session is still alive (before the process exits) +- If a session has already exited, skip the screenshot — do NOT save a text file as a substitute + +## Post Results + +Post a single PR comment: + +```markdown +## 🧪 TUI Test Results + +**X/Y flows passed** + +### ✅ Passed + +- Flow name 1 +- Flow name 2 + +### ❌ Failed + +#### Flow name 3 + +**Expected:** what should have happened **Actual:** what happened + +
+Terminal output +``` + +(paste tui_read_screen text output here) + +``` + +
+``` + +If all flows pass, omit the Failed section. + +For failures, use `tui_read_screen` to capture the terminal text and paste it in the comment. SVG screenshots are +uploaded as workflow artifacts separately — do not try to embed them in the comment. + +## Verification + +After each flow completes, verify the side effects — not just the TUI output: + +- If a project was created: use `shell` to check the directory exists and contains expected files (e.g. + `agentcore.json`) +- If a resource was added: use `shell` to check the config file was updated +- If a command produced output: verify the output matches expectations + +Do not rely solely on what the TUI displays. Confirm the CLI actually did what it claimed. + +## Forbidden Actions + +- Do NOT modify, create, or delete source files +- Do NOT run git commands (add, commit, push) +- Do NOT create or update branches +- Do NOT approve or merge the pull request +- Do NOT deploy or create AWS resources unless the test flow explicitly requires it diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md new file mode 100644 index 00000000..74c25578 --- /dev/null +++ b/.github/agent-sops/tui-test-flows.md @@ -0,0 +1,27 @@ +# TUI Test Flows + +--- + +## Flow: Help text lists all commands + +1. Launch: `agentcore --help` +2. Wait for "Usage:" on screen +3. Take SVG screenshot immediately (before the process exits) +4. Verify these commands are visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs` +5. Close session + +--- + +## Flow: Create project with agent via TUI wizard + +1. Create a temp directory via `shell`: `mktemp -d` +2. Launch: `agentcore create` with `cwd` set to the temp directory +3. Wait for "Project name" prompt, type `TuiTest`, press Enter +4. Wait for "Would you like to add an agent" — expect "Yes, add an agent" visible, press Enter +5. Wait for "Agent name" prompt, accept the default, press Enter +6. Wait for "Select agent type" — expect "Create new agent" visible, press Enter +7. Wait for "Language" step — expect "Python" visible, press Enter +8. Continue pressing Enter through remaining steps (Build, Protocol, Framework, Model) accepting defaults +9. At the "Confirm" step, take SVG screenshot, then press Enter +10. Wait for the process to exit or a success message +11. Close session diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs index 27beb25d..b97538dc 100644 --- a/.github/scripts/javascript/process-inputs.cjs +++ b/.github/scripts/javascript/process-inputs.cjs @@ -4,48 +4,48 @@ const fs = require('fs'); -async function getIssueInfo(github, context, inputs) { +async function getIssueInfo(github, repo, inputs, eventName, payload) { let issueId; - if (context.eventName === 'workflow_dispatch') { + if (eventName === 'workflow_dispatch') { issueId = inputs.issue_id; } else { // Handle both issue comments and PR comments - issueId = (context.payload.issue?.number || context.payload.pull_request?.number)?.toString(); + issueId = (payload.issue?.number || payload.pull_request?.number)?.toString(); } const command = - context.eventName === 'workflow_dispatch' + eventName === 'workflow_dispatch' ? inputs.command - : context.payload.comment.body.match(/^\/strands\s*(.*)$/)?.[1]?.trim() || ''; + : payload.comment.body.match(/^\/strands\s*(.*)$/)?.[1]?.trim() || ''; - console.log(`Event: ${context.eventName}, Issue ID: ${issueId}, Command: "${command}"`); + console.log(`Event: ${eventName}, Issue ID: ${issueId}, Command: "${command}"`); const issue = await github.rest.issues.get({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, issue_number: issueId, }); return { issueId, command, issue }; } -async function determineBranch(github, context, issueId, mode, isPullRequest) { +async function determineBranch(github, repo, issueId, mode, isPullRequest) { let branchName = 'main'; if (mode === 'implementer' && !isPullRequest) { branchName = `agent-tasks/${issueId}`; const mainRef = await github.rest.git.getRef({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, ref: 'heads/main', }); try { await github.rest.git.createRef({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, ref: `refs/heads/${branchName}`, sha: mainRef.data.object.sha, }); @@ -59,8 +59,8 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) { } } else if (isPullRequest) { const pr = await github.rest.pulls.get({ - owner: context.repo.owner, - repo: context.repo.repo, + owner: repo.owner, + repo: repo.repo, pull_number: issueId, }); branchName = pr.data.head.ref; @@ -69,7 +69,7 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) { return branchName; } -function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) { +function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, repo) { const sessionId = inputs.session_id || (mode === 'implementer' ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-') : `${mode}-${issueId}`); @@ -78,33 +78,57 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) implementer: '.github/agent-sops/task-implementer.sop.md', reviewer: '.github/agent-sops/task-reviewer.sop.md', refiner: '.github/agent-sops/task-refiner.sop.md', + tester: '.github/agent-sops/task-tester.sop.md', }; const scriptFile = sopFiles[mode] || sopFiles.refiner; const systemPrompt = fs.readFileSync(scriptFile, 'utf8'); let prompt = isPullRequest ? 'The pull request id is:' : 'The issue id is:'; - prompt += `${issueId}\n${command}\nreview and continue`; + prompt += `${issueId}\n`; + prompt += `The repository is: ${repo.owner}/${repo.repo}\n`; + + if (mode === 'tester') { + const flowDescription = command.replace(/^test\s*/, '').trim(); + if (flowDescription) { + prompt += `Run this ad-hoc test flow: ${flowDescription}\n`; + } else { + prompt += `Run all predefined test flows from .github/agent-sops/tui-test-flows.md\n`; + } + } else { + prompt += `${command}\n`; + } + prompt += 'review and continue'; return { sessionId, systemPrompt, prompt }; } module.exports = async (context, github, core, inputs) => { try { - const { issueId, command, issue } = await getIssueInfo(github, context, inputs); + const repo = inputs.target_repo || { owner: context.repo.owner, repo: context.repo.repo }; + + const { issueId, command, issue } = await getIssueInfo(github, repo, inputs, context.eventName, context.payload); const isPullRequest = !!issue.data.pull_request; - const mode = command.startsWith('review') - ? 'reviewer' - : isPullRequest || command.startsWith('implement') - ? 'implementer' - : 'refiner'; + + const COMMAND_MODES = { test: 'tester', review: 'reviewer', implement: 'implementer' }; + const mode = + Object.entries(COMMAND_MODES).find(([prefix]) => command.startsWith(prefix))?.[1] ?? + (isPullRequest ? 'implementer' : 'refiner'); console.log(`Is PR: ${isPullRequest}, Mode: ${mode}`); - const branchName = await determineBranch(github, context, issueId, mode, isPullRequest); + const branchName = await determineBranch(github, repo, issueId, mode, isPullRequest); console.log(`Building prompts - mode: ${mode}, issue: ${issueId}, is PR: ${isPullRequest}`); - const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs); + const { sessionId, systemPrompt, prompt } = buildPrompts( + mode, + issueId, + isPullRequest, + command, + branchName, + inputs, + repo + ); console.log(`Session ID: ${sessionId}`); console.log(`Task prompt: "${prompt}"`); @@ -113,6 +137,7 @@ module.exports = async (context, github, core, inputs) => { core.setOutput('session_id', sessionId); core.setOutput('system_prompt', systemPrompt); core.setOutput('prompt', prompt); + core.setOutput('mode', mode); } catch (error) { const errorMsg = `Failed: ${error.message}`; console.error(errorMsg); diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index bfba8c80..92482446 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -70,6 +70,9 @@ jobs: fetch-depth: 0 - name: Add strands-running label + # continue-on-error: workflow_dispatch from a fork targets the fork repo + # where the upstream issue/PR doesn't exist, causing a 404. + continue-on-error: true uses: actions/github-script@v8 with: script: | @@ -90,10 +93,40 @@ jobs: const inputs = { issue_id: '${{ inputs.issue_id }}', command: '${{ inputs.command }}', - session_id: '${{ inputs.session_id }}' + session_id: '${{ inputs.session_id }}', + // When dispatched from a fork, target the upstream repo for API calls + ...(context.eventName === 'workflow_dispatch' && context.repo.owner !== 'aws' + ? { target_repo: { owner: 'aws', repo: 'agentcore-cli' } } + : {}), }; await processInputs(context, github, core, inputs); + - name: Setup Node.js (tester mode) + if: steps.process-inputs.outputs.mode == 'tester' + uses: actions/setup-node@v6 + with: + node-version: 20.x + cache: 'npm' + + - name: Build CLI and TUI harness (tester mode) + if: steps.process-inputs.outputs.mode == 'tester' + run: | + npm ci + npm run build + npm run build:harness + npm install -g "$(npm pack | tail -1)" + mkdir -p /tmp/mcp-harness + cp dist/mcp-harness/index.mjs /tmp/mcp-harness/index.mjs + cd /tmp/mcp-harness && npm init -y && npm install node-pty @xterm/headless express + + - name: Set MCP harness path + if: steps.process-inputs.outputs.mode == 'tester' + id: mcp-config + run: + echo + "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"/tmp/mcp-harness/index.mjs\"]}}}" + >> "$GITHUB_OUTPUT" + - name: Run Strands Agent uses: ./.github/actions/strands-action with: @@ -102,6 +135,7 @@ jobs: provider: 'bedrock' model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0' tools: 'strands_tools:shell,retrieve' + mcp_servers: ${{ steps.mcp-config.outputs.mcp_servers || '' }} aws_role_arn: ${{ secrets.AWS_ROLE_ARN }} aws_region: 'us-west-2' pat_token: ${{ secrets.GITHUB_TOKEN }} @@ -110,6 +144,14 @@ jobs: S3_SESSION_BUCKET: ${{ secrets.AGENT_SESSIONS_BUCKET }} BRANCH_NAME: ${{ steps.process-inputs.outputs.branch_name }} + - name: Upload TUI screenshots + if: always() && steps.process-inputs.outputs.mode == 'tester' + uses: actions/upload-artifact@v4 + with: + name: tui-screenshots + path: /tmp/tui-screenshots/ + if-no-files-found: ignore + - name: Remove strands-running label if: always() uses: actions/github-script@v8