From df2f677dee3ce4afda7ee561dd4f321c40320afd Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 15 Feb 2019 21:58:23 -0800 Subject: [PATCH 01/11] tools/power turbostat: Restore ability to execute in topology-order turbostat executes on CPUs in "topology order". This is an optimization for measuring profoundly idle systems -- as the closest hardware is woken next... Fix a typo that was added with the sub-die-node support, that broke topology ordering on multi-node systems. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9327c0ddc3a5..78d88b7d4e98 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -314,9 +314,8 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int retval, pkg_no, core_no, thread_no, node_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { - for (node_no = 0; node_no < topo.nodes_per_pkg; - node_no++) { + for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t; From 562855eeb1136009bc9d597116ac5829bf82dd06 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 19 Mar 2019 17:52:32 -0400 Subject: [PATCH 02/11] tools/power turbostat: Cleanup CC3-skip code no functional change Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 78d88b7d4e98..861cb795679b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -666,7 +666,7 @@ void print_header(char *delim) if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) + if (DO_BIC(BIC_CPU_c3)) outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); @@ -1002,7 +1002,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) + if (DO_BIC(BIC_CPU_c3)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); @@ -1817,7 +1817,7 @@ retry: if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) { + if (DO_BIC(BIC_CPU_c3)) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } @@ -4703,6 +4703,9 @@ void process_cpuid() do_knl_cstates = is_knl(family, model); do_cnl_cstates = is_cnl(family, model); + if (do_slm_cstates || do_knl_cstates || do_cnl_cstates) + BIC_NOT_PRESENT(BIC_CPU_c3); + if (!quiet) decode_misc_pwr_mgmt_msr(); From 31a1f15cea5e56fe8151ddf01278cb60c325626b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 19 Mar 2019 17:55:06 -0400 Subject: [PATCH 03/11] tools/power turbostat: Cleanup CNL-specific code no functional change. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 861cb795679b..f2384ee98cdc 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -63,7 +63,6 @@ unsigned int dump_only; unsigned int do_snb_cstates; unsigned int do_knl_cstates; unsigned int do_slm_cstates; -unsigned int do_cnl_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; @@ -4701,9 +4700,8 @@ void process_cpuid() } do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - do_cnl_cstates = is_cnl(family, model); - if (do_slm_cstates || do_knl_cstates || do_cnl_cstates) + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model)) BIC_NOT_PRESENT(BIC_CPU_c3); if (!quiet) From 937807d355a375393557674e3233662a7131c46b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 19 Mar 2019 19:09:07 -0400 Subject: [PATCH 04/11] tools/power turbostat: Add Icelake support From a turbostat point of view, Iceland is like Cannonlake. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f2384ee98cdc..4f81b52aef84 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4449,6 +4449,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: return INTEL_FAM6_SKYLAKE_MOBILE; + + case INTEL_FAM6_ICELAKE_MOBILE: + return INTEL_FAM6_CANNONLAKE_MOBILE; } return model; } From 6de68fe15a0fcd0e887d73bd7a549e4dc6446481 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 14 Feb 2019 19:17:40 -0800 Subject: [PATCH 05/11] tools/power turbostat: Add Die column If the system has more than one software visible die per package, print a Die column. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 42 +++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4f81b52aef84..7d6c14ecf35a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -272,6 +272,7 @@ struct system_summary { struct cpu_topology { int physical_package_id; + int die_id; int logical_cpu_id; int physical_node_id; int logical_node_id; /* 0-based count within the package */ @@ -282,6 +283,7 @@ struct cpu_topology { struct topo_params { int num_packages; + int num_die; int num_cpus; int num_cores; int max_cpu_num; @@ -440,6 +442,7 @@ struct msr_counter bic[] = { { 0x0, "CPU" }, { 0x0, "APIC" }, { 0x0, "X2APIC" }, + { 0x0, "Die" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -493,6 +496,7 @@ struct msr_counter bic[] = { #define BIC_CPU (1ULL << 47) #define BIC_APIC (1ULL << 48) #define BIC_X2APIC (1ULL << 49) +#define BIC_Die (1ULL << 50) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -619,6 +623,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -902,6 +908,8 @@ int format_counters(struct thread_data *t, struct core_data *c, if (t == &average.threads) { if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -919,6 +927,12 @@ int format_counters(struct thread_data *t, struct core_data *c, else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_Die)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } if (DO_BIC(BIC_Node)) { if (t) outp += sprintf(outp, "%s%d", @@ -2454,6 +2468,8 @@ void free_all_buffers(void) /* * Parse a file containing a single int. + * Return 0 if file can not be opened + * Exit if file can be opened, but can not be parsed */ int parse_int_file(const char *fmt, ...) { @@ -2465,7 +2481,9 @@ int parse_int_file(const char *fmt, ...) va_start(args, fmt); vsnprintf(path, sizeof(path), fmt, args); va_end(args); - filep = fopen_or_die(path, "r"); + filep = fopen(path, "r"); + if (!filep) + return 0; if (fscanf(filep, "%d", &value) != 1) err(1, "%s: failed to parse number from file", path); fclose(filep); @@ -2486,6 +2504,11 @@ int get_physical_package_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); } +int get_die_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); +} + int get_core_id(int cpu) { return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); @@ -4772,6 +4795,7 @@ void topology_probe() int i; int max_core_id = 0; int max_package_id = 0; + int max_die_id = 0; int max_siblings = 0; /* Initialize num_cpus, max_cpu_num */ @@ -4838,6 +4862,11 @@ void topology_probe() if (cpus[i].physical_package_id > max_package_id) max_package_id = cpus[i].physical_package_id; + /* get die information */ + cpus[i].die_id = get_die_id(i); + if (cpus[i].die_id > max_die_id) + max_die_id = cpus[i].die_id; + /* get numa node information */ cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); if (cpus[i].physical_node_id > topo.max_node_num) @@ -4863,6 +4892,13 @@ void topology_probe() if (!summary_only && topo.cores_per_node > 1) BIC_PRESENT(BIC_Core); + topo.num_die = max_die_id + 1; + if (debug > 1) + fprintf(outf, "max_die_id %d, sizing for %d die\n", + max_die_id, topo.num_die); + if (!summary_only && topo.num_die > 1) + BIC_PRESENT(BIC_Die); + topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(outf, "max_package_id %d, sizing for %d packages\n", @@ -4887,8 +4923,8 @@ void topology_probe() if (cpu_is_not_present(i)) continue; fprintf(outf, - "cpu %d pkg %d node %d lnode %d core %d thread %d\n", - i, cpus[i].physical_package_id, + "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, cpus[i].die_id, cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, From 0a42d235e50d677775056324d0762dd102a9ebb0 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 13 Aug 2018 08:45:01 -0400 Subject: [PATCH 06/11] tools/power turbostat: Do not display an error on systems without a cpufreq driver Running without a cpufreq driver is a valid case so warnings output in this case should not be to stderr. Use outf instead of stderr for these warnings. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7d6c14ecf35a..0716abdb1bd9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3465,7 +3465,7 @@ dump_sysfs_pstate_config(void) base_cpu); input = fopen(path, "r"); if (input == NULL) { - fprintf(stderr, "NSFOD %s\n", path); + fprintf(outf, "NSFOD %s\n", path); return; } fgets(driver_buf, sizeof(driver_buf), input); @@ -3475,7 +3475,7 @@ dump_sysfs_pstate_config(void) base_cpu); input = fopen(path, "r"); if (input == NULL) { - fprintf(stderr, "NSFOD %s\n", path); + fprintf(outf, "NSFOD %s\n", path); return; } fgets(governor_buf, sizeof(governor_buf), input); From 9392bd98bba760be96ee67f51cb040dcf7aa190e Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 17 Aug 2018 12:34:41 -0400 Subject: [PATCH 07/11] tools/power turbostat: Add support for AMD Fam 17h (Zen) RAPL Based on the Open-Source Register Reference for AMD Family 17h Processors Models 00h-2Fh: https://support.amd.com/TechDocs/56255_OSRR.pdf These processors report RAPL support in bit 14 of CPUID 0x80000007 EDX, and the following MSRs are present: 0xc0010299 (RAPL_PWR_UNIT), like Intel's RAPL_POWER_UNIT 0xc001029a (CORE_ENERGY_STAT), kind of like Intel's PP0_ENERGY_STATUS 0xc001029b (PKG_ENERGY_STAT), like Intel's PKG_ENERGY_STATUS A notable difference from the Intel implementation is that AMD reports the "Cores" energy usage separately for each core, rather than a per-package total. The code has been adjusted to handle either case in a generic way. I haven't yet enabled collection of package power, due to being unable to test it on multi-node systems (TR, EPYC). Signed-off-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 159 +++++++++++++++++++++----- 1 file changed, 130 insertions(+), 29 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0716abdb1bd9..538878b9deb8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -44,6 +44,7 @@ #include #include #include +#include char *proc_stat = "/proc/stat"; FILE *outf; @@ -140,9 +141,21 @@ unsigned int first_counter_read = 1; #define RAPL_CORES_ENERGY_STATUS (1 << 9) /* 0x639 MSR_PP0_ENERGY_STATUS */ +#define RAPL_PER_CORE_ENERGY (1 << 10) + /* Indicates cores energy collection is per-core, + * not per-package. */ +#define RAPL_AMD_F17H (1 << 11) + /* 0xc0010299 MSR_RAPL_PWR_UNIT */ + /* 0xc001029a MSR_CORE_ENERGY_STAT */ + /* 0xc001029b MSR_PKG_ENERGY_STAT */ #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) #define TJMAX_DEFAULT 100 +/* MSRs that are not yet in the kernel-provided header. */ +#define MSR_RAPL_PWR_UNIT 0xc0010299 +#define MSR_CORE_ENERGY_STAT 0xc001029a +#define MSR_PKG_ENERGY_STAT 0xc001029b + #define MAX(a, b) ((a) > (b) ? (a) : (b)) /* @@ -186,6 +199,7 @@ struct core_data { unsigned long long c7; unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ unsigned int core_temp_c; + unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */ unsigned int core_id; unsigned long long counter[MAX_ADDED_COUNTERS]; } *core_even, *core_odd; @@ -684,6 +698,14 @@ void print_header(char *delim) if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); + if (do_rapl && !rapl_joules) { + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); + } else if (do_rapl && rapl_joules) { + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); + } + for (mp = sys.cp; mp; mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 64) @@ -738,7 +760,7 @@ void print_header(char *delim) if (do_rapl && !rapl_joules) { if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); - if (DO_BIC(BIC_CorWatt)) + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); @@ -751,7 +773,7 @@ void print_header(char *delim) } else if (do_rapl && rapl_joules) { if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); - if (DO_BIC(BIC_Cor_J)) + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); @@ -812,6 +834,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "c6: %016llX\n", c->c6); outp += sprintf(outp, "c7: %016llX\n", c->c7); outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + outp += sprintf(outp, "Joules: %0X\n", c->core_energy); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", @@ -1045,6 +1068,20 @@ int format_counters(struct thread_data *t, struct core_data *c, } } + /* + * If measurement interval exceeds minimum RAPL Joule Counter range, + * indicate that results are suspect by printing "**" in fraction place. + */ + if (interval_float < rapl_joule_counter_range) + fmt8 = "%s%.2f"; + else + fmt8 = "%6.0f**"; + + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); + /* print per-package data only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; @@ -1097,18 +1134,9 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_SYS_LPI)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); - /* - * If measurement interval exceeds minimum RAPL Joule Counter range, - * indicate that results are suspect by printing "**" in fraction place. - */ - if (interval_float < rapl_joule_counter_range) - fmt8 = "%s%.2f"; - else - fmt8 = "%6.0f**"; - if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); - if (DO_BIC(BIC_CorWatt)) + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); @@ -1116,7 +1144,7 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float); if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); - if (DO_BIC(BIC_Cor_J)) + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); @@ -1261,6 +1289,8 @@ delta_core(struct core_data *new, struct core_data *old) old->core_temp_c = new->core_temp_c; old->mc6_us = new->mc6_us - old->mc6_us; + DELTA_WRAP32(new->core_energy, old->core_energy); + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) old->counter[i] = new->counter[i]; @@ -1403,6 +1433,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c7 = 0; c->mc6_us = 0; c->core_temp_c = 0; + c->core_energy = 0; p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; @@ -1485,6 +1516,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + average.cores.core_energy += c->core_energy; + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; @@ -1857,6 +1890,12 @@ retry: c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) + return -14; + c->core_energy = msr & 0xFFFFFFFF; + } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &c->counter[i])) return -10; @@ -3739,7 +3778,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(unsigned int model) +double get_tdp_intel(unsigned int model) { unsigned long long msr; @@ -3756,6 +3795,16 @@ double get_tdp(unsigned int model) } } +double get_tdp_amd(unsigned int family) +{ + switch (family) { + case 0x17: + default: + /* This is the max stock TDP of HEDT/Server Fam17h chips */ + return 250.0; + } +} + /* * rapl_dram_energy_units_probe() * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. @@ -3775,21 +3824,12 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) } } - -/* - * rapl_probe() - * - * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units - */ -void rapl_probe(unsigned int family, unsigned int model) +void rapl_probe_intel(unsigned int family, unsigned int model) { unsigned long long msr; unsigned int time_unit; double tdp; - if (!genuine_intel) - return; - if (family != 6) return; @@ -3913,13 +3953,66 @@ void rapl_probe(unsigned int family, unsigned int model) rapl_time_units = 1.0 / (1 << (time_unit)); - tdp = get_tdp(model); + tdp = get_tdp_intel(model); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} - return; +void rapl_probe_amd(unsigned int family, unsigned int model) +{ + unsigned long long msr; + unsigned int eax, ebx, ecx, edx; + unsigned int has_rapl = 0; + double tdp; + + if (max_extended_level >= 0x80000007) { + __cpuid(0x80000007, eax, ebx, ecx, edx); + /* RAPL (Fam 17h) */ + has_rapl = edx & (1 << 14); + } + + if (!has_rapl) + return; + + switch (family) { + case 0x17: /* Zen, Zen+ */ + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) + BIC_PRESENT(BIC_Cor_J); + else + BIC_PRESENT(BIC_CorWatt); + break; + default: + return; + } + + if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) + return; + + rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); + rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); + rapl_power_units = ldexp(1.0, -(msr & 0xf)); + + tdp = get_tdp_amd(model); + + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + if (!quiet) + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} + +/* + * rapl_probe() + * + * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units + */ +void rapl_probe(unsigned int family, unsigned int model) +{ + if (genuine_intel) + rapl_probe_intel(family, model); + if (authentic_amd) + rapl_probe_amd(family, model); } void perf_limit_reasons_probe(unsigned int family, unsigned int model) @@ -4024,6 +4117,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label) int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; + const char *msr_name; int cpu; if (!do_rapl) @@ -4039,10 +4133,17 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } - if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) - return -1; + if (do_rapl & RAPL_AMD_F17H) { + msr_name = "MSR_RAPL_PWR_UNIT"; + if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) + return -1; + } else { + msr_name = "MSR_RAPL_POWER_UNIT"; + if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) + return -1; + } - fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr, + fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); if (do_rapl & RAPL_PKG_POWER_INFO) { From 3316f99a9f1b68c578c57e76792bd19da1c7d423 Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Fri, 17 Aug 2018 12:34:42 -0400 Subject: [PATCH 08/11] tools/power turbostat: Also read package power on AMD F17h (Zen) The package power can also be read from an MSR. It's not clear exactly what is included, and whether it's aggregated over all nodes or reported separately. It does look like this is reported separately per CCX (I get a single value on the Ryzen R7 1700), but it might be reported separately per- die (node?) on larger processors. If that's the case, it would have to be recorded per node and aggregated for the socket. Note that although Zen has these MSRs reporting power, it looks like the actual RAPL configuration (power limits, configured TDP) is done through PCI configuration space. I have not yet found any public documentation for this. Signed-off-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 538878b9deb8..ee9aaeef662e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1985,6 +1985,11 @@ retry: return -16; p->rapl_dram_perf_status = msr & 0xFFFFFFFF; } + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr)) + return -13; + p->energy_pkg = msr & 0xFFFFFFFF; + } if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return -17; @@ -3979,10 +3984,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model) switch (family) { case 0x17: /* Zen, Zen+ */ do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; - if (rapl_joules) + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); BIC_PRESENT(BIC_Cor_J); - else + } else { + BIC_PRESENT(BIC_PkgWatt); BIC_PRESENT(BIC_CorWatt); + } break; default: return; From 8173c336989c1a12290cd023969df2775b2df34d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 20 Mar 2019 23:01:03 -0400 Subject: [PATCH 09/11] tools/power turbostat: Add checks for failure of fgets() and fscanf() Most calls to fgets() and fscanf() are followed by error checks. Add an exit-on-error in the remaining cases. Signed-off-by: Ben Hutchings Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 +++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ee9aaeef662e..9817abd215b9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2643,7 +2643,8 @@ int get_thread_siblings(struct cpu_topology *thiscpu) filep = fopen_or_die(path, "r"); do { offset -= BITMASK_SIZE; - fscanf(filep, "%lx%c", &map, &character); + if (fscanf(filep, "%lx%c", &map, &character) != 2) + err(1, "%s: failed to parse file", path); for (shift = 0; shift < BITMASK_SIZE; shift++) { if ((map >> shift) & 0x1) { so = shift + offset; @@ -3475,14 +3476,14 @@ dump_sysfs_cstate_config(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) sp = strchrnul(name_buf, '\n'); *sp = '\0'; - fclose(input); sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", @@ -3490,7 +3491,8 @@ dump_sysfs_cstate_config(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(desc, sizeof(desc), input); + if (!fgets(desc, sizeof(desc), input)) + err(1, "%s: failed to read file", path); fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); fclose(input); @@ -3512,7 +3514,8 @@ dump_sysfs_pstate_config(void) fprintf(outf, "NSFOD %s\n", path); return; } - fgets(driver_buf, sizeof(driver_buf), input); + if (!fgets(driver_buf, sizeof(driver_buf), input)) + err(1, "%s: failed to read file", path); fclose(input); sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", @@ -3522,7 +3525,8 @@ dump_sysfs_pstate_config(void) fprintf(outf, "NSFOD %s\n", path); return; } - fgets(governor_buf, sizeof(governor_buf), input); + if (!fgets(governor_buf, sizeof(governor_buf), input)) + err(1, "%s: failed to read file", path); fclose(input); fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); @@ -3531,7 +3535,8 @@ dump_sysfs_pstate_config(void) sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); input = fopen(path, "r"); if (input != NULL) { - fscanf(input, "%d", &turbo); + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); fprintf(outf, "cpufreq boost: %d\n", turbo); fclose(input); } @@ -3539,7 +3544,8 @@ dump_sysfs_pstate_config(void) sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); input = fopen(path, "r"); if (input != NULL) { - fscanf(input, "%d", &turbo); + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); fclose(input); } @@ -5464,7 +5470,8 @@ void probe_sysfs(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); @@ -5491,7 +5498,8 @@ void probe_sysfs(void) input = fopen(path, "r"); if (input == NULL) continue; - fgets(name_buf, sizeof(name_buf), input); + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) From 5ea7647b333f3580697edaaf2b17a2f6d29a82f1 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 25 Sep 2018 08:59:26 -0400 Subject: [PATCH 10/11] tools/power turbostat: Warn on bad ACPI LPIT data On some systems /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us or /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us return a file error because of bad ACPI LPIT data from a misconfigured BIOS. turbostat interprets this failure as a fatal error and outputs turbostat: CPU LPI: No data available If the ACPI LPIT sysfs files return an error output a warning instead of a fatal error, disable the ACPI LPIT evaluation code, and continue. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9817abd215b9..442af7892402 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2921,8 +2921,11 @@ int snapshot_cpu_lpi_us(void) fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); - if (retval != 1) - err(1, "CPU LPI"); + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle CPU output\n"); + BIC_NOT_PRESENT(BIC_CPU_LPI); + return -1; + } fclose(fp); @@ -2944,9 +2947,11 @@ int snapshot_sys_lpi_us(void) fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); - if (retval != 1) - err(1, "SYS LPI"); - + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle System output\n"); + BIC_NOT_PRESENT(BIC_SYS_LPI); + return -1; + } fclose(fp); return 0; From 0f71d089c912769251c992b8f7dcd508a472fe10 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 20 Mar 2019 23:23:25 -0400 Subject: [PATCH 11/11] tools/power turbostat: update version number Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 442af7892402..8d176b10daec 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5278,7 +5278,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 18.07.27" + fprintf(outf, "turbostat version 19.03.20" " - Len Brown \n"); }