From 2bae9a4a6217fa9f6d6623064e7e4cdb97b66eb9 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Tue, 7 Apr 2026 11:18:18 -0400 Subject: [PATCH] add arm64 support for clock frequency helpers homa_clock() calls get_cycles(), whose backing source is arch-specific: RDTSC on x86 (frequency tsc_khz) and the generic timer on arm64 (frequency arch_timer_get_cntfrq()). Make homa_clock_khz() return the correct value per arch: - x86: tsc_khz (not cpu_khz -- the kernel tracks them separately and they can diverge) - arm64: arch_timer_get_cntfrq() / 1000 - other: 1000000 (fallback) Guard the tsc_khz-dependent scaling in homa_metrics_print() behind CONFIG_X86 and emit raw cycle counts on other arches where Linux and homa_clock() share the same counter. Replace bare tsc_khz references in timetrace.c with homa_clock_khz() so the emitted cpu_khz header is correct on all architectures. Signed-off-by: Serapheim Dimitropoulos --- homa_impl.h | 11 ++++++++++- homa_metrics.c | 15 ++++++++++++++- timetrace.c | 7 ++++--- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/homa_impl.h b/homa_impl.h index 7393ecda..a69acf8d 100644 --- a/homa_impl.h +++ b/homa_impl.h @@ -46,6 +46,9 @@ #ifndef __UPSTREAM__ /* See strip.py */ #include "homa.h" #include +#ifdef CONFIG_ARM64 +#include +#endif #include "homa_devel.h" #else /* See strip.py */ #include @@ -842,7 +845,13 @@ static inline u64 homa_clock_khz(void) return 1000000; #else /* __UNIT_TEST__ */ #ifndef __UPSTREAM__ /* See strip.py */ - return cpu_khz; +#ifdef CONFIG_X86 + return tsc_khz; +#elif defined(CONFIG_ARM64) + return arch_timer_get_cntfrq() / 1000; +#else + return 1000000; +#endif #else /* See strip.py */ return 1000000; #endif /* See strip.py */ diff --git a/homa_metrics.c b/homa_metrics.c index dee3b123..239d21a1 100644 --- a/homa_metrics.c +++ b/homa_metrics.c @@ -241,13 +241,26 @@ char *homa_metrics_print(void) M("bypass_softirq_cycles", m->bypass_softirq_cycles, "Time spent in homa_softirq during bypass from GRO\n"); - /* Adjust stats gathered in Linux that use rdtsc. */ + /* Adjust stats gathered in Linux via get_cycles (RDTSC on + * x86, arch timer on arm64). On x86 both Linux and + * homa_clock() read the TSC, so the ratio is 1; the + * explicit conversion is kept in case the sources ever + * diverge. On non-x86 the counters are identical, so raw + * values are emitted directly. + */ +#ifdef CONFIG_X86 M("linux_softirq_cycles", m->linux_softirq_cycles * (homa_clock_khz() / 1000) / (tsc_khz / 1000), "Time spent in all Linux SoftIRQ\n"); M("napi_cycles", m->napi_cycles * (homa_clock_khz() / 1000) / (tsc_khz / 1000), "Time spent in NAPI-level packet handling\n"); +#else + M("linux_softirq_cycles", m->linux_softirq_cycles, + "Time spent in all Linux SoftIRQ\n"); + M("napi_cycles", m->napi_cycles, + "Time spent in NAPI-level packet handling\n"); +#endif M("linux_softirqd_actions", m->linux_softirqd_actions, "SoftIRQ actions taken in the background softirqd thread\n"); M("send_cycles", m->send_cycles, diff --git a/timetrace.c b/timetrace.c index e32b809a..0e7d91ed 100644 --- a/timetrace.c +++ b/timetrace.c @@ -390,7 +390,8 @@ int tt_proc_open(struct inode *inode, struct file *file) if (!tt_test_no_khz) { pf->bytes_available = snprintf(pf->msg_storage, TT_PF_BUF_SIZE, - "cpu_khz: %u\n", tsc_khz); + "cpu_khz: %llu\n", + homa_clock_khz()); } done: @@ -631,7 +632,7 @@ void tt_print_file(char *path) bytes_used += snprintf(buffer + bytes_used, sizeof(buffer) - bytes_used, - "cpu_khz: %u\n", tsc_khz); + "cpu_khz: %llu\n", homa_clock_khz()); /* Each iteration of this loop printk's one event. */ while (true) { @@ -758,7 +759,7 @@ void tt_printk(void) } #endif - pr_err("cpu_khz: %u, start: %llu\n", tsc_khz, start_time); + pr_err("cpu_khz: %llu, start: %llu\n", homa_clock_khz(), start_time); /* Each iteration of this loop printk's one event. */ while (true) {