Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
1684078
feat: add CLI shell and exec commands for deployment pod terminal access
V2arK Mar 12, 2026
e26170c
fix: use urlparse for scheme replacement to satisfy CodeQL
V2arK Mar 12, 2026
996e25b
fix: apply black formatting and fix CodeQL url.startswith alert
V2arK Mar 12, 2026
3c380ed
style: condense multiline expressions for readability
V2arK Mar 12, 2026
1d4a977
fix: resolve pylint warnings in shell.py and test_shell.py
V2arK Mar 12, 2026
d84ce85
fix: skip PyTorch-dependent tests in sanity mode
V2arK Mar 12, 2026
cef57d7
fix: break out of exec loop after end marker to prevent hanging
V2arK Mar 12, 2026
c4b5757
fix: re-enable OPOST after setraw to fix terminal rendering
V2arK Mar 12, 2026
58a6005
fix: replace pytest-asyncio with asyncio.run in tests for CI compat
V2arK Mar 12, 2026
7f60fdc
fix: match Web UI protocol - remove rows/cols from stdin messages, re…
V2arK Mar 12, 2026
20cf81d
fix: send delayed resize to fix prompt rendering after shell startup
V2arK Mar 12, 2026
d3529f2
fix: await cancelled tasks for cleanup, reduce WS close_timeout to 2s
V2arK Mar 12, 2026
0ae4ba4
fix: toggle PTY width to force SIGWINCH and prompt redraw on connect
V2arK Mar 12, 2026
a661f3b
fix: include rows/cols in stdin messages and send Ctrl+L after resize…
V2arK Mar 12, 2026
90f8d9e
fix: use stty to set PTY dimensions from inside shell instead of resi…
V2arK Mar 12, 2026
69b8aad
fix: re-enable OPOST after setraw to convert bare \n to \r\n like xte…
V2arK Mar 12, 2026
dd62ac5
fix: convert \n to \r\n in output and use stty to fix PTY dimensions …
V2arK Mar 12, 2026
1ba56af
feat: use pyte terminal emulator for interactive shell rendering
V2arK Mar 12, 2026
7449bf0
fix: swap rows/cols unpacking from shutil.get_terminal_size
V2arK Mar 12, 2026
ec9230b
fix: use alternate screen buffer to prevent scrollback in Warp terminal
V2arK Mar 12, 2026
571059c
fix: handle WebSocket ConnectionClosed to prevent hang on shell exit
V2arK Mar 12, 2026
192a826
refactor: use pyte for exec ANSI stripping and add ConnectionClosed h…
V2arK Mar 12, 2026
68142c4
fix: treat ArgoCD Code message as reconnect signal, not shell exit code
V2arK Mar 12, 2026
18bb688
fix: stop reconnecting when shell has genuinely exited
V2arK Mar 12, 2026
289362a
chore: add debug file logging to shell and exec for exit hang diagnosis
V2arK Mar 13, 2026
94b7b64
fix: detect shell exit via idle timeout instead of Code message
V2arK Mar 13, 2026
b7e2441
fix: exit immediately on exit echo, ignore echo exit with trailing pr…
V2arK Mar 13, 2026
62176d6
fix: skip websocket close handshake wait after session ends
V2arK Mar 13, 2026
13d1d48
refactor: extract shell logic from CLI to SDK layer
V2arK Mar 13, 2026
4e619ec
refactor: extract shell logic to SDK layer, rely on server close frame
V2arK Mar 13, 2026
1101c51
ruff format
V2arK Mar 13, 2026
2a201ed
refactor: remove debug logging, fix unused imports and SDK/CLI bounda…
V2arK Mar 13, 2026
d3d2064
update redundancy
michaelshin Mar 17, 2026
f79fbf0
clean up pyte
michaelshin Mar 17, 2026
bb14ba2
clean up implementation
michaelshin Mar 17, 2026
ea6e0c8
address comments
michaelshin Mar 17, 2026
78cfb45
fix lint
michaelshin Mar 17, 2026
6f84d40
address comments
michaelshin Mar 31, 2026
1dca1d6
revert
michaelshin Mar 31, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions centml/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from centml.cli.login import login, logout
from centml.cli.cluster import ls, get, delete, pause, resume
from centml.cli.shell import shell, exec_cmd


@click.group()
Expand Down Expand Up @@ -47,6 +48,8 @@ def ccluster():
ccluster.add_command(delete)
ccluster.add_command(pause)
ccluster.add_command(resume)
ccluster.add_command(shell)
ccluster.add_command(exec_cmd, name="exec")


cli.add_command(ccluster, name="cluster")
84 changes: 84 additions & 0 deletions centml/cli/shell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""CLI commands for interactive shell and command execution in deployment pods."""

import asyncio
import shlex
import sys

import click

from centml.cli.cluster import handle_exception
from centml.sdk import auth
from centml.sdk.api import get_centml_client
from centml.sdk.config import settings
from centml.sdk.shell import build_ws_url, exec_session, get_running_pods, interactive_session


def _select_pod(running_pods, deployment_id):
click.echo(f"Multiple running pods found for deployment {deployment_id}:")
for i, name in enumerate(running_pods, 1):
click.echo(f" [{i}] {name}")

choice = click.prompt(
"Select a pod", type=click.IntRange(1, len(running_pods)), prompt_suffix=f" [1-{len(running_pods)}]: "
)
return running_pods[choice - 1]


def _connect_args(deployment_id, pod, shell_type, first_pod=False):
"""Resolve pod, build WebSocket URL, and obtain auth token."""
with get_centml_client() as cclient:
running_pods = get_running_pods(cclient, deployment_id)
if not running_pods:
raise click.ClickException(f"No running pods found for deployment {deployment_id}")

if pod is not None and pod not in running_pods:
pods_list = ", ".join(running_pods)
raise click.ClickException(f"Pod '{pod}' not found. Available running pods: {pods_list}")

if pod is not None:
pod_name = pod
elif len(running_pods) == 1 or first_pod:
pod_name = running_pods[0]
elif not sys.stdin.isatty():
raise click.ClickException(
"Multiple running pods found and stdin is not a TTY. "
"Please specify a pod with --pod or use --first-pod."
)
else:
pod_name = _select_pod(running_pods, deployment_id)

ws_url = build_ws_url(settings.CENTML_PLATFORM_API_URL, deployment_id, pod_name, shell_type)
token = auth.get_centml_token()
return ws_url, token


@click.command(help="Open an interactive shell to a deployment pod")
@click.argument("deployment_id", type=int)
@click.option("--pod", default=None, help="Specify a pod name")
@click.option("--shell", "shell_type", default=None, type=click.Choice(["bash", "sh", "zsh"]), help="Shell type")
@click.option(
"--first-pod", is_flag=True, default=False, help="Auto-select the first running pod (skip interactive selection)"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this right? if --pod is not provided, then we default to first pod

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are multiple replicas, then we interactively ask the user for a pod. --first-pod ensures that we apply the command to the first pod

)
@handle_exception
def shell(deployment_id, pod, shell_type, first_pod):
if not sys.stdin.isatty():
raise click.ClickException("Interactive shell requires a terminal (TTY)")

ws_url, token = _connect_args(deployment_id, pod, shell_type, first_pod)
exit_code = asyncio.run(interactive_session(ws_url, token))
sys.exit(exit_code)


@click.command(help="Execute a command in a deployment pod", context_settings={"ignore_unknown_options": True})
@click.argument("deployment_id", type=int)
@click.argument("command", nargs=-1, required=True, type=click.UNPROCESSED)
@click.option("--pod", default=None, help="Specific pod name")
@click.option("--shell", "shell_type", default=None, type=click.Choice(["bash", "sh", "zsh"]), help="Shell type")
@click.option(
"--first-pod", is_flag=True, default=False, help="Auto-select the first running pod (skip interactive selection)"
)
@handle_exception
def exec_cmd(deployment_id, command, pod, shell_type, first_pod):
ws_url, token = _connect_args(deployment_id, pod, shell_type, first_pod)
exit_code = asyncio.run(exec_session(ws_url, token, shlex.join(command)))
sys.exit(exit_code)
3 changes: 3 additions & 0 deletions centml/sdk/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def get(self, depl_type):
def get_status(self, id):
return self._api.get_deployment_status_deployments_status_deployment_id_get(id)

def get_status_v3(self, deployment_id):
return self._api.get_deployment_status_v3_deployments_status_v3_deployment_id_get(deployment_id)

def get_inference(self, id):
"""Get Inference deployment details - automatically handles both V2 and V3 deployments"""
# Try V3 first (recommended), fallback to V2 if deployment is V2
Expand Down
12 changes: 12 additions & 0 deletions centml/sdk/shell/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from centml.sdk.shell.exceptions import NoPodAvailableError, PodNotFoundError, ShellError
from centml.sdk.shell.session import build_ws_url, exec_session, get_running_pods, interactive_session

__all__ = [
"NoPodAvailableError",
"PodNotFoundError",
"ShellError",
"build_ws_url",
"exec_session",
"get_running_pods",
"interactive_session",
]
10 changes: 10 additions & 0 deletions centml/sdk/shell/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class ShellError(Exception):
"""Base exception for shell operations."""


class NoPodAvailableError(ShellError):
"""No running pods found for the deployment."""


class PodNotFoundError(ShellError):
"""Specified pod not found among running pods."""
Loading
Loading