Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions centml/cli/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,30 @@ def resume(id):
with get_centml_client() as cclient:
cclient.resume(id)
click.echo("Deployment has been resumed")


@click.command(help="Show GPU capacity across clusters")
@click.option("--cluster-id", type=int, default=None, help="Filter to a specific cluster")
@handle_exception
def capacity(cluster_id):
with get_centml_client() as cclient:
clusters = cclient.get_capacity(cluster_id)

if clusters is None:
click.echo("No accelerator capacity available")
return

rows = []
for cluster in clusters:
for gpu in cluster.gpu_types:
utilization = (gpu.used_gpus / gpu.total_gpus * 100) if gpu.total_gpus > 0 else 0
rows.append([cluster.cluster_name, gpu.gpu_type, gpu.used_gpus, gpu.total_gpus, f"{utilization:.1f}%"])

click.echo(
tabulate(
rows,
headers=["Cluster", "GPU Type", "Used", "Total", "Utilization"],
tablefmt="rounded_outline",
disable_numparse=True,
)
)
3 changes: 2 additions & 1 deletion centml/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import click

from centml.cli.login import login, logout
from centml.cli.cluster import ls, get, delete, pause, resume
from centml.cli.cluster import ls, get, delete, pause, resume, capacity


@click.group()
Expand Down Expand Up @@ -47,6 +47,7 @@ def ccluster():
ccluster.add_command(delete)
ccluster.add_command(pause)
ccluster.add_command(resume)
ccluster.add_command(capacity)


cli.add_command(ccluster, name="cluster")
3 changes: 3 additions & 0 deletions centml/sdk/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def invite_user(self, email: str):
request = InviteUserRequest(email=email)
return self._api.invite_user_organizations_invite_post(request)

def get_capacity(self, cluster_id=None):
return self._api.list_cluster_capacity_capacity_get(cluster_id=cluster_id).results


@contextmanager
def get_centml_client():
Expand Down
Loading