diff --git a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs index fc478cf6f..585964be9 100644 --- a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs +++ b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs @@ -1,14 +1,20 @@ use crate::common::format::human_size; +use crate::dashboard::data::DashboardData; +use itertools::Itertools; use ratatui::layout::{Constraint, Rect}; -use ratatui::style::Style; use ratatui::widgets::{Cell, Row, Table}; use std::cmp; use tako::hwstats::MemoryStats; +use tako::resources::{ + CPU_RESOURCE_NAME, ResourceDescriptorItem, ResourceDescriptorKind, ResourceIndex, +}; +use tako::worker::WorkerConfiguration; +use tako::{Set, WorkerId}; -use crate::dashboard::ui::styles; +use crate::dashboard::ui::styles::{self, table_style_deselected}; use crate::dashboard::ui::terminal::DashboardFrame; use crate::dashboard::ui::widgets::progressbar::{ - ProgressPrintStyle, get_progress_bar_color, render_progress_bar_at, + ProgressPrintStyle, get_cpu_progress_bar_color, render_progress_bar_at, }; use crate::dashboard::utils::calculate_average; @@ -16,66 +22,288 @@ const CPU_METER_PROGRESSBAR_WIDTH: u8 = 18; // 4 characters for the label const CPU_METER_WIDTH: u8 = CPU_METER_PROGRESSBAR_WIDTH + 4; -pub fn render_cpu_util_table( - cpu_util_list: &[f64], - mem_util: &MemoryStats, - rect: Rect, - frame: &mut DashboardFrame, - table_style: Style, -) { - if cpu_util_list.is_empty() { - return; +#[derive(Default)] +pub struct CpuUtilTable { + utilization: Option, + cpu_view_mode: CpuViewMode, + cpu_state: Option, +} + +#[derive(Default, PartialEq)] +pub enum CpuViewMode { + Global, + #[default] + WorkerManaged, + WorkerAssigned, +} + +pub enum CpuScope { + /// Worker manages all of the known Node cpus + Node, + /// Worker manages only a subset of known Node cpus + Subset, +} + +impl CpuViewMode { + fn next(&mut self, cpu_scope: CpuScope) { + match cpu_scope { + CpuScope::Node => { + *self = match self { + CpuViewMode::WorkerManaged => CpuViewMode::WorkerAssigned, + CpuViewMode::WorkerAssigned => CpuViewMode::WorkerManaged, + CpuViewMode::Global => CpuViewMode::WorkerManaged, // To skip out of the global in case the state changes + } + } + CpuScope::Subset => { + *self = match self { + CpuViewMode::Global => CpuViewMode::WorkerManaged, + CpuViewMode::WorkerManaged => CpuViewMode::WorkerAssigned, + CpuViewMode::WorkerAssigned => CpuViewMode::Global, + } + } + } } - let constraints = get_column_constraints(rect, cpu_util_list.len()); - let width = constraints.len(); - let height = (cpu_util_list.len() as f64 / width as f64).ceil() as usize; + fn next_text(&self, cpu_scope: CpuScope) -> &str { + match cpu_scope { + CpuScope::Node => { + match self { + CpuViewMode::WorkerManaged => "Show worker assigned CPU utilization", + CpuViewMode::WorkerAssigned => "Show worker managed CPU utilization", + CpuViewMode::Global => "Show worker managed CPU utilization", // To skip out of the global in case the state changes + } + } + CpuScope::Subset => match self { + CpuViewMode::Global => "Show worker managed CPU utilization", + CpuViewMode::WorkerManaged => "Show worker assigned CPU utilization", + CpuViewMode::WorkerAssigned => "Show global CPU utilization", + }, + } + } + + fn get_visible_indices( + &self, + total_cpus: usize, + cpu_state: &WorkerCpuState, + ) -> Set { + match self { + CpuViewMode::Global => (0..total_cpus) + .map(|idx| ResourceIndex::new(idx as u32)) + .collect(), + CpuViewMode::WorkerManaged => cpu_state.managed_cpus.clone(), + CpuViewMode::WorkerAssigned => cpu_state.assigned_cpus.clone(), + } + } + + fn set_default(&mut self) { + *self = CpuViewMode::WorkerManaged; + } +} + +struct Utilization { + cpu: Vec, + memory: MemoryStats, +} + +struct WorkerCpuState { + /// CPU cores currently managed by the worker. + managed_cpus: Set, + /// CPU cores assigned to at least a single task that is currently running on this worker. + assigned_cpus: Set, +} + +impl CpuUtilTable { + pub fn update( + &mut self, + data: &DashboardData, + worker_id: WorkerId, + worker_config: Option<&WorkerConfiguration>, + ) { + if let Some(configuration) = worker_config { + let managed_cpus: Option<&ResourceDescriptorItem> = configuration + .resources + .resources + .iter() + .find(|resource| resource.name == CPU_RESOURCE_NAME); + + let managed_cpus = if let Some(managed_cpus) = managed_cpus { + cpu_resource_desc_to_idx(managed_cpus).unwrap_or_default() + } else { + Set::default() + }; + + if let Some(overview) = data + .workers() + .query_worker_overview_at(worker_id, data.current_time()) + { + let assigned_cpus: Set = match self.cpu_view_mode { + CpuViewMode::WorkerManaged | CpuViewMode::WorkerAssigned => overview + .item + .running_tasks + .iter() + .flat_map(|(_id, task_resource_alloc)| { + task_resource_alloc + .resources + .iter() + .filter_map(|resource_alloc| { + if resource_alloc.resource == CPU_RESOURCE_NAME { + Some(resource_alloc.indices.iter().map(|(index, _)| *index)) + } else { + None + } + }) + }) + .flatten() + .collect(), + CpuViewMode::Global => Set::default(), + }; + + self.cpu_state = Some(WorkerCpuState { + managed_cpus, + assigned_cpus, + }); - let mut rows: Vec> = vec![vec![]; height]; - for (position, &cpu_util) in cpu_util_list.iter().enumerate() { - let row = position % height; - rows[row].push((cpu_util, position)); + if let Some(hw_state) = overview.item.hw_state.as_ref() { + self.utilization = Some(Utilization { + cpu: hw_state + .state + .cpu_usage + .cpu_per_core_percent_usage + .iter() + .map(|&v| v as f64) + .collect(), + memory: hw_state.state.memory_usage.clone(), + }) + } + } else { + self.cpu_state = None; + } + } else { + self.cpu_state = None; + } } - let rows: Vec = rows - .into_iter() - .map(|targets| { - let columns: Vec = targets + pub fn draw(&mut self, rect: Rect, frame: &mut DashboardFrame) { + if let (Some(util), Some(cpu_state)) = (&self.utilization, &self.cpu_state) { + if util.cpu.is_empty() { + return; + } + + let visible_indices = self + .cpu_view_mode + .get_visible_indices(util.cpu.len(), cpu_state); + + let cell_data: Vec<(u32, f64, bool)> = visible_indices .into_iter() - .map(|(cpu_util, position)| { - let progress = cpu_util / 100.00; - Cell::from(render_progress_bar_at( - Some(format!("{position:>3} ")), - progress, - CPU_METER_PROGRESSBAR_WIDTH, - ProgressPrintStyle::default(), - )) - .style(get_progress_bar_color(progress)) + .map(|idx| { + let val = util + .cpu + .get(idx.as_num() as usize) + .copied() + .unwrap_or_default(); + let is_used = cpu_state.assigned_cpus.contains(&idx); + (idx.as_num(), val, is_used) }) + .sorted_by_key(|&(idx, _, used)| (std::cmp::Reverse(used), idx)) .collect(); - Row::new(columns) - }) - .collect(); - - let avg_cpu = calculate_average(cpu_util_list); - - let mem_used = mem_util.total - mem_util.free; - let title = styles::table_title(format!( - "Worker Utilization ({} CPUs), Avg CPU = {:.0}%, Mem = {:.0}% ({}/{})", - cpu_util_list.len(), - avg_cpu, - (mem_used as f64 / mem_util.total as f64) * 100.0, - human_size(mem_used), - human_size(mem_util.total) - )); - let body_block = styles::table_block_with_title(title); - - let table = Table::new(rows, constraints) - .block(body_block) - .row_highlight_style(styles::style_table_highlight()) - .style(table_style); - - frame.render_widget(table, rect); + + let constraints = get_column_constraints(rect, cell_data.len()); + + let width = constraints.len(); + + let total_cells = cell_data.len(); + let rows: Vec = if width > 0 && total_cells > 0 { + let num_rows = total_cells.div_ceil(width); + + (0..num_rows) + .map(|row_start_idx| { + let cells: Vec = cell_data + .iter() + .skip(row_start_idx) + .step_by(num_rows) + .map(|(id, cpu_util, used)| { + let progress = cpu_util / 100.0; + let style = get_cpu_progress_bar_color( + progress, + *used, + &self.cpu_view_mode, + ); + + Cell::from(render_progress_bar_at( + Some(format!("{id:>3} ")), + progress, + CPU_METER_PROGRESSBAR_WIDTH, + ProgressPrintStyle::default(), + )) + .style(style) + }) + .collect(); + + Row::new(cells) + }) + .collect() + } else { + vec![] + }; + + let mem_used = util.memory.total - util.memory.free; + let (which_util, num_cpus, avg_cpu) = + create_title_info(&self.cpu_view_mode, &cell_data); + + let title = styles::table_title(format!( + "{} Utilization ({} CPUs), Avg CPU = {:.0}%, Mem = {:.0}% ({}/{})", + which_util, + num_cpus, + avg_cpu, + (mem_used as f64 / util.memory.total as f64) * 100.0, + human_size(mem_used), + human_size(util.memory.total) + )); + let body_block = styles::table_block_with_title(title); + + let table = Table::new(rows, constraints) + .block(body_block) + .row_highlight_style(styles::style_table_highlight()) + .style(table_style_deselected()); + + frame.render_widget(table, rect); + } + } + + pub fn next_view(&mut self) { + let scope = self.get_current_scope(); + self.cpu_view_mode.next(scope); + } + + pub fn next_text(&mut self) -> &str { + let scope = self.get_current_scope(); + self.cpu_view_mode.next_text(scope) + } + + pub fn clear_table(&mut self) { + self.clear_util(); + self.set_default_view(); + } + + fn clear_util(&mut self) { + self.utilization = None; + } + + fn set_default_view(&mut self) { + self.cpu_view_mode.set_default(); + } + + fn get_current_scope(&self) -> CpuScope { + if let (Some(util), Some(cpu_state)) = (&self.utilization, &self.cpu_state) { + if util.cpu.len() == cpu_state.managed_cpus.len() { + CpuScope::Node + } else { + CpuScope::Subset + } + } else { + CpuScope::Node + } + } } /// Creates the column sizes for the cpu_util_table, each column divides the row equally. @@ -83,9 +311,60 @@ fn get_column_constraints(rect: Rect, num_cpus: usize) -> Vec { let max_columns = (rect.width / CPU_METER_WIDTH as u16) as usize; let num_columns = cmp::min(max_columns, num_cpus); - std::iter::repeat_n( - Constraint::Percentage((100 / num_columns) as u16), - num_columns, - ) - .collect() + if num_columns > 0 { + std::iter::repeat_n( + Constraint::Percentage((100 / num_columns) as u16), + num_columns, + ) + .collect() + } else { + vec![] + } +} + +fn create_title_info( + util_render_mode: &CpuViewMode, + cpu_table_data: &[(u32, f64, bool)], +) -> (String, usize, f64) { + let which_util = match util_render_mode { + CpuViewMode::Global => "Node", + CpuViewMode::WorkerManaged => "Worker Managed", + CpuViewMode::WorkerAssigned => "Worker Assigned", + } + .to_string(); + + let num_cpus = cpu_table_data.len(); + let cpu_utils = cpu_table_data + .iter() + .map(|(_, util, _)| *util) + .collect::>(); + let avg_usage = calculate_average(&cpu_utils); + + (which_util, num_cpus, avg_usage) +} + +/// Mapping of CPU resource descriptor item to set of Resource Indexes +fn cpu_resource_desc_to_idx(resource: &ResourceDescriptorItem) -> Option> { + match &resource.kind { + ResourceDescriptorKind::List { values } => values + .iter() + .map(|s| s.parse::()) + .collect::, _>>() + .ok(), + ResourceDescriptorKind::Range { start, end } => Some( + (u32::from(*start)..=u32::from(*end)) + .map(ResourceIndex::from) + .collect(), + ), + ResourceDescriptorKind::Groups { groups } => Some( + groups + .iter() + .flat_map(|group| group.iter()) + .map(|s| s.parse::()) + .collect::, _>>() + .ok()?, + ), + // Based on Resource kind `sum` cannot be used with CPUs. CPUs must have identity + ResourceDescriptorKind::Sum { .. } => unreachable!(), + } } diff --git a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs index d5e608e54..012dc0b4d 100644 --- a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs +++ b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs @@ -1,20 +1,17 @@ use crate::dashboard::data::DashboardData; use crate::dashboard::data::timelines::job_timeline::TaskInfo; -use crate::dashboard::ui::screens::cluster::worker::cpu_util_table::render_cpu_util_table; +use crate::dashboard::ui::screens::cluster::worker::cpu_util_table::CpuUtilTable; use crate::dashboard::ui::screens::cluster::worker::worker_config_table::WorkerConfigTable; use crate::dashboard::ui::screens::cluster::worker::worker_utilization_chart::WorkerUtilizationChart; -use crate::dashboard::ui::styles::{ - style_footer, style_header_text, table_style_deselected, table_style_selected, -}; +use crate::dashboard::ui::styles::{style_footer, style_header_text, table_style_selected}; use crate::dashboard::ui::terminal::DashboardFrame; use crate::dashboard::ui::widgets::tasks_table::TasksTable; use crate::dashboard::ui::widgets::text::draw_text; use crossterm::event::{KeyCode, KeyEvent}; use ratatui::layout::{Constraint, Direction, Layout, Rect}; -use tako::hwstats::MemoryStats; use tako::{JobTaskId, WorkerId}; -mod cpu_util_table; +pub mod cpu_util_table; mod worker_config_table; mod worker_utilization_chart; @@ -24,8 +21,7 @@ pub struct WorkerDetail { utilization_history: WorkerUtilizationChart, worker_config_table: WorkerConfigTable, worker_tasks_table: TasksTable, - - utilization: Option, + cpu_util_table: CpuUtilTable, } impl Default for WorkerDetail { @@ -35,20 +31,15 @@ impl Default for WorkerDetail { utilization_history: Default::default(), worker_config_table: Default::default(), worker_tasks_table: TasksTable::non_interactive(), - utilization: None, + cpu_util_table: Default::default(), } } } -struct Utilization { - cpu: Vec, - memory: MemoryStats, -} - impl WorkerDetail { pub fn clear_worker_id(&mut self) { self.worker_id = None; - self.utilization = None; + self.cpu_util_table.clear_table(); } pub fn set_worker_id(&mut self, worker_id: WorkerId) { @@ -66,17 +57,19 @@ impl WorkerDetail { frame, style_header_text(), ); - draw_text(": Back", layout.footer, frame, style_footer()); - - if let Some(util) = &self.utilization { - render_cpu_util_table( - &util.cpu, - &util.memory, - layout.current_utilization, - frame, - table_style_deselected(), - ); - } + + draw_text( + format!( + ": Back, : {}", + self.cpu_util_table.next_text() + ) + .as_str(), + layout.footer, + frame, + style_footer(), + ); + + self.cpu_util_table.draw(layout.current_utilization, frame); self.utilization_history .draw(layout.utilization_history, frame); @@ -94,32 +87,19 @@ impl WorkerDetail { pub fn update(&mut self, data: &DashboardData) { if let Some(worker_id) = self.worker_id { self.utilization_history.update(data, worker_id); + let mut worker_config = None; - if let Some((cpu_util, mem_util)) = data - .workers() - .query_worker_overview_at(worker_id, data.current_time()) - .and_then(|overview| overview.item.hw_state.as_ref()) - .map(|hw_state| { - ( - &hw_state.state.cpu_usage.cpu_per_core_percent_usage, - &hw_state.state.memory_usage, - ) - }) - { - self.utilization = Some(Utilization { - cpu: cpu_util.iter().map(|&v| v as f64).collect(), - memory: mem_util.clone(), - }); + if let Some(configuration) = data.workers().query_worker_config_for(worker_id) { + self.worker_config_table.update(configuration); + worker_config = Some(configuration); } + self.cpu_util_table.update(data, worker_id, worker_config); + let tasks_info: Vec<(JobTaskId, &TaskInfo)> = data.query_task_history_for_worker(worker_id).collect(); self.worker_tasks_table .update(tasks_info, data.current_time()); - - if let Some(configuration) = data.workers().query_worker_config_for(worker_id) { - self.worker_config_table.update(configuration); - } } } @@ -127,6 +107,7 @@ impl WorkerDetail { pub fn handle_key(&mut self, key: KeyEvent) { match key.code { KeyCode::Backspace => self.worker_tasks_table.clear_selection(), + KeyCode::Char('c') => self.cpu_util_table.next_view(), _ => self.worker_tasks_table.handle_key(key), } } diff --git a/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs b/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs index 4d97126f4..dc3f56dd3 100644 --- a/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs +++ b/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs @@ -1,6 +1,8 @@ use ratatui::style::{Color, Modifier, Style}; use unicode_width::UnicodeWidthStr; +use crate::dashboard::ui::screens::cluster::worker::cpu_util_table::CpuViewMode; + const GREEN_THRESHOLD: f64 = 0.5; const YELLOW_THRESHOLD: f64 = 0.7; @@ -30,6 +32,39 @@ pub fn get_progress_bar_color(progress: f64) -> Style { } } +pub fn get_cpu_progress_bar_color( + progress: f64, + used: bool, + util_render_mode: &CpuViewMode, +) -> Style { + let color = match util_render_mode { + CpuViewMode::Global | CpuViewMode::WorkerAssigned => { + if progress <= GREEN_THRESHOLD { + Color::Green + } else if progress <= YELLOW_THRESHOLD { + Color::Yellow + } else { + Color::Red + } + } + CpuViewMode::WorkerManaged => match (progress, used) { + (progress, true) if progress <= GREEN_THRESHOLD => Color::Green, + (progress, true) if progress <= YELLOW_THRESHOLD => Color::Yellow, + (_, true) => Color::Red, + (progress, false) if progress <= GREEN_THRESHOLD => Color::LightBlue, + (progress, false) if progress <= YELLOW_THRESHOLD => Color::Cyan, + (_, false) => Color::Magenta, + }, + }; + + Style { + fg: Some(color), + bg: None, + add_modifier: Modifier::empty(), + sub_modifier: Modifier::empty(), + } +} + /** * Creates a string progress bar for 0 < progress < 1 */