diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 0fd8a09..5404962 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -232,3 +232,30 @@ def resume(id): with get_centml_client() as cclient: cclient.resume(id) click.echo("Deployment has been resumed") + + +@click.command(help="Show GPU capacity across clusters") +@click.option("--cluster-id", type=int, default=None, help="Filter to a specific cluster") +@handle_exception +def capacity(cluster_id): + with get_centml_client() as cclient: + clusters = cclient.get_capacity(cluster_id) + + if clusters is None: + click.echo("No accelerator capacity available") + return + + rows = [] + for cluster in clusters: + for gpu in cluster.gpu_types: + utilization = (gpu.used_gpus / gpu.total_gpus * 100) if gpu.total_gpus > 0 else 0 + rows.append([cluster.cluster_name, gpu.gpu_type, gpu.used_gpus, gpu.total_gpus, f"{utilization:.1f}%"]) + + click.echo( + tabulate( + rows, + headers=["Cluster", "GPU Type", "Used", "Total", "Utilization"], + tablefmt="rounded_outline", + disable_numparse=True, + ) + ) diff --git a/centml/cli/main.py b/centml/cli/main.py index b1ecc73..e45e251 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -1,7 +1,7 @@ import click from centml.cli.login import login, logout -from centml.cli.cluster import ls, get, delete, pause, resume +from centml.cli.cluster import ls, get, delete, pause, resume, capacity @click.group() @@ -47,6 +47,7 @@ def ccluster(): ccluster.add_command(delete) ccluster.add_command(pause) ccluster.add_command(resume) +ccluster.add_command(capacity) cli.add_command(ccluster, name="cluster") diff --git a/centml/sdk/api.py b/centml/sdk/api.py index ea07c20..319c297 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -145,6 +145,9 @@ def invite_user(self, email: str): request = InviteUserRequest(email=email) return self._api.invite_user_organizations_invite_post(request) + def get_capacity(self, cluster_id=None): + return self._api.list_cluster_capacity_capacity_get(cluster_id=cluster_id).results + @contextmanager def get_centml_client():