1
0
Fork 0

arm[64] perf updates for 4.6:

- Initial support for ARMv8.1 CPU PMUs
 
 - Support for the CPU PMU in Cavium ThunderX
 
 - CPU PMU support for systems running 32-bit Linux in secure mode
 
 - Support for the system PMU in ARM CCI-550 (Cache Coherent Interconnect)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABCgAGBQJW794rAAoJELescNyEwWM0O5IH/0ejoUjip3n4dFZnSzAbQQZe
 VxCy3DXW5gS8YaswwX2dFw9K772/BpHlazq8AIJIhaR+b+Zzl5t0iOc12HluDilV
 pMvi0JTCxwJhsEiKZnP0cVAU9HM6MAgtMOEegkd/YNESKQey30NeDtIcz/pQfTUV
 28AF71+w5VPj/1EpHEEhHQsASRIx7eDbKzThzdlb8PnDS0o23QJhL9HjVTNIAlB8
 BGxrUBKtBu0eH2Hx33vNjc7UYx1WZQlCk5cAaXevA8mbFXzYaMQI2Cel2nbNMO9i
 eu5zPkDUCG7dq16PxK6IgM4AsDCtmmDuckLdN6UEQWYxkLbb2qHNRKtj0bKB8Sk=
 =E4PE
 -----END PGP SIGNATURE-----

Merge tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm[64] perf updates from Will Deacon:
 "I have another mixed bag of ARM-related perf patches here.

  It's about 25% CPU and 75% interconnect, but with drivers/bus/
  languishing without an obvious maintainer or tree, Olof and I agreed
  to keep all of these PMU patches together.  I suspect a whole load of
  code from drivers/bus/arm-* can be moved under drivers/perf/, so
  that's on the radar for the future.

  Summary:

   - Initial support for ARMv8.1 CPU PMUs

   - Support for the CPU PMU in Cavium ThunderX

   - CPU PMU support for systems running 32-bit Linux in secure mode

   - Support for the system PMU in ARM CCI-550 (Cache Coherent Interconnect)"

* tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (26 commits)
  drivers/perf: arm_pmu: avoid NULL dereference when not using devicetree
  arm64: perf: Extend ARMV8_EVTYPE_MASK to include PMCR.LC
  arm-cci: remove unused variable
  arm-cci: don't return value from void function
  arm-cci: make private functions static
  arm-cci: CoreLink CCI-550 PMU driver
  arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU
  arm-cci: CCI-500: Work around PMU counter writes
  arm-cci: Provide hook for writing to PMU counters
  arm-cci: Add helper to enable PMU without synchornising counters
  arm-cci: Add routines to save/restore all counters
  arm-cci: Get the status of a counter
  arm-cci: write_counter: Remove redundant check
  arm-cci: Delay PMU counter writes to pmu::pmu_enable
  arm-cci: Refactor CCI PMU enable/disable methods
  arm-cci: Group writes to counter
  arm-cci: fix handling cpumask_any_but return value
  arm-cci: simplify sysfs attr handling
  drivers/perf: arm_pmu: implement CPU_PM notifier
  arm64: dts: Add Cavium ThunderX specific PMU
  ...
This commit is contained in:
Linus Torvalds 2016-03-21 13:14:16 -07:00
commit 2c856e14da
9 changed files with 662 additions and 223 deletions

View file

@ -34,6 +34,7 @@ specific to ARM.
Definition: must contain one of the following:
"arm,cci-400"
"arm,cci-500"
"arm,cci-550"
- reg
Usage: required
@ -101,6 +102,7 @@ specific to ARM.
"arm,cci-400-pmu" - DEPRECATED, permitted only where OS has
secure acces to CCI registers
"arm,cci-500-pmu,r0"
"arm,cci-550-pmu,r0"
- reg:
Usage: required
Value type: Integer cells. A register entry, expressed

View file

@ -25,6 +25,7 @@ Required properties:
"qcom,scorpion-pmu"
"qcom,scorpion-mp-pmu"
"qcom,krait-pmu"
"cavium,thunder-pmu"
- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
interrupt (PPI) then 1 interrupt should be specified.
@ -46,6 +47,16 @@ Optional properties:
- qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
events.
- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register
(SDER) is accessible. This will cause the driver to do
any setup required that is only possible in ARMv7 secure
state. If not present the ARMv7 SDER will not be touched,
which means the PMU may fail to operate unless external
code (bootloader or security monitor) has performed the
appropriate initialisation. Note that this property is
not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
in Non-secure state.
Example:
pmu {

View file

@ -712,6 +712,11 @@ static const struct attribute_group *armv7_pmuv2_attr_groups[] = {
#define ARMV7_EXCLUDE_USER (1 << 30)
#define ARMV7_INCLUDE_HYP (1 << 27)
/*
* Secure debug enable reg
*/
#define ARMV7_SDER_SUNIDEN BIT(1) /* Permit non-invasive debug */
static inline u32 armv7_pmnc_read(void)
{
u32 val;
@ -1094,7 +1099,13 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
static void armv7pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
u32 idx, nb_cnt = cpu_pmu->num_events;
u32 idx, nb_cnt = cpu_pmu->num_events, val;
if (cpu_pmu->secure_access) {
asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
val |= ARMV7_SDER_SUNIDEN;
asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
}
/* The counter and interrupt enable registers are unknown at reset. */
for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {

View file

@ -360,6 +360,11 @@
<1 10 0xff01>;
};
pmu {
compatible = "cavium,thunder-pmu", "arm,armv8-pmuv3";
interrupts = <1 7 4>;
};
soc {
compatible = "simple-bus";
#address-cells = <2>;

View file

@ -88,16 +88,25 @@
#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
#define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30
/* ARMv8 implementation defined event types. */
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD 0x40
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST 0x41
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD 0x42
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43
#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C
#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D
#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD 0x4E
#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST 0x4F
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2
/* ARMv8 Cortex-A57 and Cortex-A72 specific event types. */
#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD 0x40
#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST 0x41
#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD 0x42
#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST 0x43
#define ARMV8_A57_PERFCTR_DTLB_REFILL_LD 0x4c
#define ARMV8_A57_PERFCTR_DTLB_REFILL_ST 0x4d
/* ARMv8 Cavium ThunderX specific event types. */
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST 0xE9
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS 0xEA
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS 0xEB
#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS 0xEC
#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS 0xED
/* PMUv3 HW events mapping. */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
@ -132,6 +141,18 @@ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};
static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
};
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@ -175,16 +196,46 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST,
[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST,
[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS,
[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS,
[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS,
[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS,
[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
@ -325,7 +376,6 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
NULL,
};
/*
* Perf Events' indices
*/
@ -356,9 +406,10 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */
#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */
#define ARMV8_PMCR_N_MASK 0x1f
#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
#define ARMV8_PMCR_MASK 0x7f /* Mask for writable bits */
/*
* PMOVSR: counters overflow flag status reg
@ -369,8 +420,8 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
/*
* PMXEVTYPER: Event selection reg
*/
#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
#define ARMV8_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
#define ARMV8_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
/*
* Event filters for PMUv3
@ -445,9 +496,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV8_IDX_CYCLE_COUNTER)
asm volatile("msr pmccntr_el0, %0" :: "r" (value));
else if (armv8pmu_select_counter(idx) == idx)
else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
/*
* Set the upper 32bits as this is a 64bit counter but we only
* count using the lower 32bits and we want an interrupt when
* it overflows.
*/
u64 value64 = 0xffffffff00000000ULL | value;
asm volatile("msr pmccntr_el0, %0" :: "r" (value64));
} else if (armv8pmu_select_counter(idx) == idx)
asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
}
@ -722,8 +780,11 @@ static void armv8pmu_reset(void *info)
armv8pmu_disable_intens(idx);
}
/* Initialize & Reset PMNC: C and P bits. */
armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
/*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
*/
armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C | ARMV8_PMCR_LC);
}
static int armv8_pmuv3_map_event(struct perf_event *event)
@ -747,6 +808,13 @@ static int armv8_a57_map_event(struct perf_event *event)
ARMV8_EVTYPE_EVENT);
}
static int armv8_thunder_map_event(struct perf_event *event)
{
return armpmu_map_event(event, &armv8_thunder_perf_map,
&armv8_thunder_perf_cache_map,
ARMV8_EVTYPE_EVENT);
}
static void armv8pmu_read_num_pmnc_events(void *info)
{
int *nb_cnt = info;
@ -815,11 +883,21 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
return armv8pmu_probe_num_events(cpu_pmu);
}
static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cavium_thunder";
cpu_pmu->map_event = armv8_thunder_map_event;
cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
return armv8pmu_probe_num_events(cpu_pmu);
}
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
{},
};

View file

@ -34,15 +34,15 @@ config ARM_CCI400_PORT_CTRL
Low level power management driver for CCI400 cache coherent
interconnect for ARM platforms.
config ARM_CCI500_PMU
bool "ARM CCI500 PMU support"
config ARM_CCI5xx_PMU
bool "ARM CCI-500/CCI-550 PMU support"
depends on (ARM && CPU_V7) || ARM64
depends on PERF_EVENTS
select ARM_CCI_PMU
help
Support for PMU events monitoring on the ARM CCI-500 cache coherent
interconnect. CCI-500 provides 8 independent event counters, which
can count events pertaining to the slave/master interfaces as well
Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache
coherent interconnects. Both of them provide 8 independent event counters,
which can count events pertaining to the slave/master interfaces as well
as the internal events to the CCI.
If unsure, say Y

View file

@ -52,8 +52,9 @@ static const struct of_device_id arm_cci_matches[] = {
#ifdef CONFIG_ARM_CCI400_COMMON
{.compatible = "arm,cci-400", .data = CCI400_PORTS_DATA },
#endif
#ifdef CONFIG_ARM_CCI500_PMU
#ifdef CONFIG_ARM_CCI5xx_PMU
{ .compatible = "arm,cci-500", },
{ .compatible = "arm,cci-550", },
#endif
{},
};
@ -92,7 +93,7 @@ static const struct of_device_id arm_cci_matches[] = {
enum {
CCI_IF_SLAVE,
CCI_IF_MASTER,
#ifdef CONFIG_ARM_CCI500_PMU
#ifdef CONFIG_ARM_CCI5xx_PMU
CCI_IF_GLOBAL,
#endif
CCI_IF_MAX,
@ -121,13 +122,12 @@ struct cci_pmu_model {
u32 fixed_hw_cntrs;
u32 num_hw_cntrs;
u32 cntr_size;
u64 nformat_attrs;
u64 nevent_attrs;
struct dev_ext_attribute *format_attrs;
struct dev_ext_attribute *event_attrs;
struct attribute **format_attrs;
struct attribute **event_attrs;
struct event_range event_ranges[CCI_IF_MAX];
int (*validate_hw_event)(struct cci_pmu *, unsigned long);
int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long);
void (*write_counters)(struct cci_pmu *, unsigned long *);
};
static struct cci_pmu_model cci_pmu_models[];
@ -155,19 +155,24 @@ enum cci_models {
CCI400_R0,
CCI400_R1,
#endif
#ifdef CONFIG_ARM_CCI500_PMU
#ifdef CONFIG_ARM_CCI5xx_PMU
CCI500_R0,
CCI550_R0,
#endif
CCI_MODEL_MAX
};
static void pmu_write_counters(struct cci_pmu *cci_pmu,
unsigned long *mask);
static ssize_t cci_pmu_format_show(struct device *dev,
struct device_attribute *attr, char *buf);
static ssize_t cci_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \
{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config }
#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \
&((struct dev_ext_attribute[]) { \
{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } \
})[0].attr.attr
#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
@ -242,12 +247,13 @@ enum cci400_perf_events {
static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
static struct dev_ext_attribute cci400_pmu_format_attrs[] = {
static struct attribute *cci400_pmu_format_attrs[] = {
CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"),
NULL
};
static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = {
static struct attribute *cci400_r0_pmu_event_attrs[] = {
/* Slave events */
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
@ -279,9 +285,10 @@ static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = {
CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A),
/* Special event for cycles counter */
CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
NULL
};
static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = {
static struct attribute *cci400_r1_pmu_event_attrs[] = {
/* Slave events */
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
@ -325,6 +332,7 @@ static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = {
CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11),
/* Special event for cycles counter */
CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
NULL
};
static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
@ -420,72 +428,68 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev
}
#endif /* CONFIG_ARM_CCI400_PMU */
#ifdef CONFIG_ARM_CCI500_PMU
#ifdef CONFIG_ARM_CCI5xx_PMU
/*
* CCI500 provides 8 independent event counters that can count
* any of the events available.
*
* CCI500 PMU event id is an 9-bit value made of two parts.
* CCI5xx PMU event id is an 9-bit value made of two parts.
* bits [8:5] - Source for the event
* 0x0-0x6 - Slave interfaces
* 0x8-0xD - Master interfaces
* 0xf - Global Events
* 0x7,0xe - Reserved
*
* bits [4:0] - Event code (specific to type of interface)
*
*
*/
/* Port ids */
#define CCI500_PORT_S0 0x0
#define CCI500_PORT_S1 0x1
#define CCI500_PORT_S2 0x2
#define CCI500_PORT_S3 0x3
#define CCI500_PORT_S4 0x4
#define CCI500_PORT_S5 0x5
#define CCI500_PORT_S6 0x6
#define CCI5xx_PORT_S0 0x0
#define CCI5xx_PORT_S1 0x1
#define CCI5xx_PORT_S2 0x2
#define CCI5xx_PORT_S3 0x3
#define CCI5xx_PORT_S4 0x4
#define CCI5xx_PORT_S5 0x5
#define CCI5xx_PORT_S6 0x6
#define CCI500_PORT_M0 0x8
#define CCI500_PORT_M1 0x9
#define CCI500_PORT_M2 0xa
#define CCI500_PORT_M3 0xb
#define CCI500_PORT_M4 0xc
#define CCI500_PORT_M5 0xd
#define CCI5xx_PORT_M0 0x8
#define CCI5xx_PORT_M1 0x9
#define CCI5xx_PORT_M2 0xa
#define CCI5xx_PORT_M3 0xb
#define CCI5xx_PORT_M4 0xc
#define CCI5xx_PORT_M5 0xd
#define CCI5xx_PORT_M6 0xe
#define CCI500_PORT_GLOBAL 0xf
#define CCI5xx_PORT_GLOBAL 0xf
#define CCI500_PMU_EVENT_MASK 0x1ffUL
#define CCI500_PMU_EVENT_SOURCE_SHIFT 0x5
#define CCI500_PMU_EVENT_SOURCE_MASK 0xf
#define CCI500_PMU_EVENT_CODE_SHIFT 0x0
#define CCI500_PMU_EVENT_CODE_MASK 0x1f
#define CCI5xx_PMU_EVENT_MASK 0x1ffUL
#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5
#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf
#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0
#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f
#define CCI500_PMU_EVENT_SOURCE(event) \
((event >> CCI500_PMU_EVENT_SOURCE_SHIFT) & CCI500_PMU_EVENT_SOURCE_MASK)
#define CCI500_PMU_EVENT_CODE(event) \
((event >> CCI500_PMU_EVENT_CODE_SHIFT) & CCI500_PMU_EVENT_CODE_MASK)
#define CCI5xx_PMU_EVENT_SOURCE(event) \
((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK)
#define CCI5xx_PMU_EVENT_CODE(event) \
((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK)
#define CCI500_SLAVE_PORT_MIN_EV 0x00
#define CCI500_SLAVE_PORT_MAX_EV 0x1f
#define CCI500_MASTER_PORT_MIN_EV 0x00
#define CCI500_MASTER_PORT_MAX_EV 0x06
#define CCI500_GLOBAL_PORT_MIN_EV 0x00
#define CCI500_GLOBAL_PORT_MAX_EV 0x0f
#define CCI5xx_SLAVE_PORT_MIN_EV 0x00
#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f
#define CCI5xx_MASTER_PORT_MIN_EV 0x00
#define CCI5xx_MASTER_PORT_MAX_EV 0x06
#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00
#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f
#define CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
CCI_EXT_ATTR_ENTRY(_name, cci500_pmu_global_event_show, \
#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \
(unsigned long) _config)
static ssize_t cci500_pmu_global_event_show(struct device *dev,
static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
static struct dev_ext_attribute cci500_pmu_format_attrs[] = {
static struct attribute *cci5xx_pmu_format_attrs[] = {
CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"),
NULL,
};
static struct dev_ext_attribute cci500_pmu_event_attrs[] = {
static struct attribute *cci5xx_pmu_event_attrs[] = {
/* Slave events */
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0),
CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1),
@ -530,63 +534,73 @@ static struct dev_ext_attribute cci500_pmu_event_attrs[] = {
CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6),
/* Global events */
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
NULL
};
static ssize_t cci500_pmu_global_event_show(struct device *dev,
static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dev_ext_attribute *eattr = container_of(attr,
struct dev_ext_attribute, attr);
/* Global events have single fixed source code */
return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n",
(unsigned long)eattr->var, CCI500_PORT_GLOBAL);
(unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
}
/*
* CCI500 provides 8 independent event counters that can count
* any of the events available.
* CCI500 PMU event source ids
* 0x0-0x6 - Slave interfaces
* 0x8-0xD - Master interfaces
* 0xf - Global Events
* 0x7,0xe - Reserved
*/
static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
unsigned long hw_event)
{
u32 ev_source = CCI500_PMU_EVENT_SOURCE(hw_event);
u32 ev_code = CCI500_PMU_EVENT_CODE(hw_event);
u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
int if_type;
if (hw_event & ~CCI500_PMU_EVENT_MASK)
if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
return -ENOENT;
switch (ev_source) {
case CCI500_PORT_S0:
case CCI500_PORT_S1:
case CCI500_PORT_S2:
case CCI500_PORT_S3:
case CCI500_PORT_S4:
case CCI500_PORT_S5:
case CCI500_PORT_S6:
case CCI5xx_PORT_S0:
case CCI5xx_PORT_S1:
case CCI5xx_PORT_S2:
case CCI5xx_PORT_S3:
case CCI5xx_PORT_S4:
case CCI5xx_PORT_S5:
case CCI5xx_PORT_S6:
if_type = CCI_IF_SLAVE;
break;
case CCI500_PORT_M0:
case CCI500_PORT_M1:
case CCI500_PORT_M2:
case CCI500_PORT_M3:
case CCI500_PORT_M4:
case CCI500_PORT_M5:
case CCI5xx_PORT_M0:
case CCI5xx_PORT_M1:
case CCI5xx_PORT_M2:
case CCI5xx_PORT_M3:
case CCI5xx_PORT_M4:
case CCI5xx_PORT_M5:
if_type = CCI_IF_MASTER;
break;
case CCI500_PORT_GLOBAL:
case CCI5xx_PORT_GLOBAL:
if_type = CCI_IF_GLOBAL;
break;
default:
@ -599,7 +613,118 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
return -ENOENT;
}
#endif /* CONFIG_ARM_CCI500_PMU */
/*
* CCI550 provides 8 independent event counters that can count
* any of the events available.
* CCI550 PMU event source ids
* 0x0-0x6 - Slave interfaces
* 0x8-0xe - Master interfaces
* 0xf - Global Events
* 0x7 - Reserved
*/
static int cci550_validate_hw_event(struct cci_pmu *cci_pmu,
unsigned long hw_event)
{
u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
int if_type;
if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
return -ENOENT;
switch (ev_source) {
case CCI5xx_PORT_S0:
case CCI5xx_PORT_S1:
case CCI5xx_PORT_S2:
case CCI5xx_PORT_S3:
case CCI5xx_PORT_S4:
case CCI5xx_PORT_S5:
case CCI5xx_PORT_S6:
if_type = CCI_IF_SLAVE;
break;
case CCI5xx_PORT_M0:
case CCI5xx_PORT_M1:
case CCI5xx_PORT_M2:
case CCI5xx_PORT_M3:
case CCI5xx_PORT_M4:
case CCI5xx_PORT_M5:
case CCI5xx_PORT_M6:
if_type = CCI_IF_MASTER;
break;
case CCI5xx_PORT_GLOBAL:
if_type = CCI_IF_GLOBAL;
break;
default:
return -ENOENT;
}
if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
ev_code <= cci_pmu->model->event_ranges[if_type].max)
return hw_event;
return -ENOENT;
}
#endif /* CONFIG_ARM_CCI5xx_PMU */
/*
* Program the CCI PMU counters which have PERF_HES_ARCH set
* with the event period and mark them ready before we enable
* PMU.
*/
static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
{
int i;
struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
DECLARE_BITMAP(mask, cci_pmu->num_cntrs);
bitmap_zero(mask, cci_pmu->num_cntrs);
for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
struct perf_event *event = cci_hw->events[i];
if (WARN_ON(!event))
continue;
/* Leave the events which are not counting */
if (event->hw.state & PERF_HES_STOPPED)
continue;
if (event->hw.state & PERF_HES_ARCH) {
set_bit(i, mask);
event->hw.state &= ~PERF_HES_ARCH;
}
}
pmu_write_counters(cci_pmu, mask);
}
/* Should be called with cci_pmu->hw_events->pmu_lock held */
static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu)
{
u32 val;
/* Enable all the PMU counters. */
val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
writel(val, cci_ctrl_base + CCI_PMCR);
}
/* Should be called with cci_pmu->hw_events->pmu_lock held */
static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu)
{
cci_pmu_sync_counters(cci_pmu);
__cci_pmu_enable_nosync(cci_pmu);
}
/* Should be called with cci_pmu->hw_events->pmu_lock held */
static void __cci_pmu_disable(void)
{
u32 val;
/* Disable all the PMU counters. */
val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
writel(val, cci_ctrl_base + CCI_PMCR);
}
static ssize_t cci_pmu_format_show(struct device *dev,
struct device_attribute *attr, char *buf)
@ -633,8 +758,8 @@ static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offs
static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value,
int idx, unsigned int offset)
{
return writel_relaxed(value, cci_pmu->base +
CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
writel_relaxed(value, cci_pmu->base +
CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
}
static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx)
@ -647,11 +772,55 @@ static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx)
pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL);
}
static bool __maybe_unused
pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx)
{
return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0;
}
static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
{
pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
}
/*
* For all counters on the CCI-PMU, disable any 'enabled' counters,
* saving the changed counters in the mask, so that we can restore
* it later using pmu_restore_counters. The mask is private to the
* caller. We cannot rely on the used_mask maintained by the CCI_PMU
* as it only tells us if the counter is assigned to perf_event or not.
* The state of the perf_event cannot be locked by the PMU layer, hence
* we check the individual counter status (which can be locked by
* cci_pm->hw_events->pmu_lock).
*
* @mask should be initialised to empty by the caller.
*/
static void __maybe_unused
pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
{
int i;
for (i = 0; i < cci_pmu->num_cntrs; i++) {
if (pmu_counter_is_enabled(cci_pmu, i)) {
set_bit(i, mask);
pmu_disable_counter(cci_pmu, i);
}
}
}
/*
* Restore the status of the counters. Reversal of the pmu_save_counters().
* For each counter set in the mask, enable the counter back.
*/
static void __maybe_unused
pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
{
int i;
for_each_set_bit(i, mask, cci_pmu->num_cntrs)
pmu_enable_counter(cci_pmu, i);
}
/*
* Returns the number of programmable counters actually implemented
* by the cci
@ -754,18 +923,98 @@ static u32 pmu_read_counter(struct perf_event *event)
return value;
}
static void pmu_write_counter(struct perf_event *event, u32 value)
static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
{
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
struct hw_perf_event *hw_counter = &event->hw;
int idx = hw_counter->idx;
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
else
pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
}
static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
{
int i;
struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
struct perf_event *event = cci_hw->events[i];
if (WARN_ON(!event))
continue;
pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
}
}
static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
{
if (cci_pmu->model->write_counters)
cci_pmu->model->write_counters(cci_pmu, mask);
else
__pmu_write_counters(cci_pmu, mask);
}
#ifdef CONFIG_ARM_CCI5xx_PMU
/*
* CCI-500/CCI-550 has advanced power saving policies, which could gate the
* clocks to the PMU counters, which makes the writes to them ineffective.
* The only way to write to those counters is when the global counters
* are enabled and the particular counter is enabled.
*
* So we do the following :
*
* 1) Disable all the PMU counters, saving their current state
* 2) Enable the global PMU profiling, now that all counters are
* disabled.
*
* For each counter to be programmed, repeat steps 3-7:
*
* 3) Write an invalid event code to the event control register for the
counter, so that the counters are not modified.
* 4) Enable the counter control for the counter.
* 5) Set the counter value
* 6) Disable the counter
* 7) Restore the event in the target counter
*
* 8) Disable the global PMU.
* 9) Restore the status of the rest of the counters.
*
* We choose an event which for CCI-5xx is guaranteed not to count.
* We use the highest possible event code (0x1f) for the master interface 0.
*/
#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \
(CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT))
static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
{
int i;
DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs);
bitmap_zero(saved_mask, cci_pmu->num_cntrs);
pmu_save_counters(cci_pmu, saved_mask);
/*
* Now that all the counters are disabled, we can safely turn the PMU on,
* without syncing the status of the counters
*/
__cci_pmu_enable_nosync(cci_pmu);
for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
struct perf_event *event = cci_pmu->hw_events.events[i];
if (WARN_ON(!event))
continue;
pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT);
pmu_enable_counter(cci_pmu, i);
pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
pmu_disable_counter(cci_pmu, i);
pmu_set_event(cci_pmu, i, event->hw.config_base);
}
__cci_pmu_disable();
pmu_restore_counters(cci_pmu, saved_mask);
}
#endif /* CONFIG_ARM_CCI5xx_PMU */
static u64 pmu_event_update(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@ -789,7 +1038,7 @@ static void pmu_read(struct perf_event *event)
pmu_event_update(event);
}
void pmu_event_set_period(struct perf_event *event)
static void pmu_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
/*
@ -800,7 +1049,14 @@ void pmu_event_set_period(struct perf_event *event)
*/
u64 val = 1ULL << 31;
local64_set(&hwc->prev_count, val);
pmu_write_counter(event, val);
/*
* CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose
* values needs to be sync-ed with the s/w state before the PMU is
* enabled.
* Mark this counter for sync.
*/
hwc->state |= PERF_HES_ARCH;
}
static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
@ -811,6 +1067,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
int idx, handled = IRQ_NONE;
raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable the PMU while we walk through the counters */
__cci_pmu_disable();
/*
* Iterate over counters and update the corresponding perf events.
* This should work regardless of whether we have per-counter overflow
@ -818,13 +1077,10 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
*/
for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
struct perf_event *event = events->events[idx];
struct hw_perf_event *hw_counter;
if (!event)
continue;
hw_counter = &event->hw;
/* Did this counter overflow? */
if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) &
CCI_PMU_OVRFLW_FLAG))
@ -837,6 +1093,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
pmu_event_set_period(event);
handled = IRQ_HANDLED;
}
/* Enable the PMU and sync possibly overflowed counters */
__cci_pmu_enable_sync(cci_pmu);
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
return IRQ_RETVAL(handled);
@ -875,16 +1134,12 @@ static void cci_pmu_enable(struct pmu *pmu)
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
unsigned long flags;
u32 val;
if (!enabled)
return;
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
/* Enable all the PMU counters. */
val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
writel(val, cci_ctrl_base + CCI_PMCR);
__cci_pmu_enable_sync(cci_pmu);
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
}
@ -894,13 +1149,9 @@ static void cci_pmu_disable(struct pmu *pmu)
struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
unsigned long flags;
u32 val;
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
/* Disable all the PMU counters. */
val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
writel(val, cci_ctrl_base + CCI_PMCR);
__cci_pmu_disable();
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
}
@ -1176,9 +1427,8 @@ static int cci_pmu_event_init(struct perf_event *event)
static ssize_t pmu_cpumask_attr_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dev_ext_attribute *eattr = container_of(attr,
struct dev_ext_attribute, attr);
struct cci_pmu *cci_pmu = eattr->var;
struct pmu *pmu = dev_get_drvdata(dev);
struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
cpumask_pr_args(&cci_pmu->cpus));
@ -1187,13 +1437,11 @@ static ssize_t pmu_cpumask_attr_show(struct device *dev,
return n;
}
static struct dev_ext_attribute pmu_cpumask_attr = {
__ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL),
NULL, /* Populated in cci_pmu_init */
};
static struct device_attribute pmu_cpumask_attr =
__ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL);
static struct attribute *pmu_attrs[] = {
&pmu_cpumask_attr.attr.attr,
&pmu_cpumask_attr.attr,
NULL,
};
@ -1218,60 +1466,14 @@ static const struct attribute_group *pmu_attr_groups[] = {
NULL
};
static struct attribute **alloc_attrs(struct platform_device *pdev,
int n, struct dev_ext_attribute *source)
{
int i;
struct attribute **attrs;
/* Alloc n + 1 (for terminating NULL) */
attrs = devm_kcalloc(&pdev->dev, n + 1, sizeof(struct attribute *),
GFP_KERNEL);
if (!attrs)
return attrs;
for(i = 0; i < n; i++)
attrs[i] = &source[i].attr.attr;
return attrs;
}
static int cci_pmu_init_attrs(struct cci_pmu *cci_pmu, struct platform_device *pdev)
{
const struct cci_pmu_model *model = cci_pmu->model;
struct attribute **attrs;
/*
* All allocations below are managed, hence doesn't need to be
* free'd explicitly in case of an error.
*/
if (model->nevent_attrs) {
attrs = alloc_attrs(pdev, model->nevent_attrs,
model->event_attrs);
if (!attrs)
return -ENOMEM;
pmu_event_attr_group.attrs = attrs;
}
if (model->nformat_attrs) {
attrs = alloc_attrs(pdev, model->nformat_attrs,
model->format_attrs);
if (!attrs)
return -ENOMEM;
pmu_format_attr_group.attrs = attrs;
}
pmu_cpumask_attr.var = cci_pmu;
return 0;
}
static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
{
char *name = cci_pmu->model->name;
const struct cci_pmu_model *model = cci_pmu->model;
char *name = model->name;
u32 num_cntrs;
int rc;
rc = cci_pmu_init_attrs(cci_pmu, pdev);
if (rc)
return rc;
pmu_event_attr_group.attrs = model->event_attrs;
pmu_format_attr_group.attrs = model->format_attrs;
cci_pmu->pmu = (struct pmu) {
.name = cci_pmu->model->name,
@ -1314,7 +1516,7 @@ static int cci_pmu_cpu_notifier(struct notifier_block *self,
if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
break;
target = cpumask_any_but(cpu_online_mask, cpu);
if (target < 0) // UP, last CPU
if (target >= nr_cpu_ids) // UP, last CPU
break;
/*
* TODO: migrate context once core races on event->ctx have
@ -1336,9 +1538,7 @@ static struct cci_pmu_model cci_pmu_models[] = {
.num_hw_cntrs = 4,
.cntr_size = SZ_4K,
.format_attrs = cci400_pmu_format_attrs,
.nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs),
.event_attrs = cci400_r0_pmu_event_attrs,
.nevent_attrs = ARRAY_SIZE(cci400_r0_pmu_event_attrs),
.event_ranges = {
[CCI_IF_SLAVE] = {
CCI400_R0_SLAVE_PORT_MIN_EV,
@ -1358,9 +1558,7 @@ static struct cci_pmu_model cci_pmu_models[] = {
.num_hw_cntrs = 4,
.cntr_size = SZ_4K,
.format_attrs = cci400_pmu_format_attrs,
.nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs),
.event_attrs = cci400_r1_pmu_event_attrs,
.nevent_attrs = ARRAY_SIZE(cci400_r1_pmu_event_attrs),
.event_ranges = {
[CCI_IF_SLAVE] = {
CCI400_R1_SLAVE_PORT_MIN_EV,
@ -1375,31 +1573,54 @@ static struct cci_pmu_model cci_pmu_models[] = {
.get_event_idx = cci400_get_event_idx,
},
#endif
#ifdef CONFIG_ARM_CCI500_PMU
#ifdef CONFIG_ARM_CCI5xx_PMU
[CCI500_R0] = {
.name = "CCI_500",
.fixed_hw_cntrs = 0,
.num_hw_cntrs = 8,
.cntr_size = SZ_64K,
.format_attrs = cci500_pmu_format_attrs,
.nformat_attrs = ARRAY_SIZE(cci500_pmu_format_attrs),
.event_attrs = cci500_pmu_event_attrs,
.nevent_attrs = ARRAY_SIZE(cci500_pmu_event_attrs),
.format_attrs = cci5xx_pmu_format_attrs,
.event_attrs = cci5xx_pmu_event_attrs,
.event_ranges = {
[CCI_IF_SLAVE] = {
CCI500_SLAVE_PORT_MIN_EV,
CCI500_SLAVE_PORT_MAX_EV,
CCI5xx_SLAVE_PORT_MIN_EV,
CCI5xx_SLAVE_PORT_MAX_EV,
},
[CCI_IF_MASTER] = {
CCI500_MASTER_PORT_MIN_EV,
CCI500_MASTER_PORT_MAX_EV,
CCI5xx_MASTER_PORT_MIN_EV,
CCI5xx_MASTER_PORT_MAX_EV,
},
[CCI_IF_GLOBAL] = {
CCI500_GLOBAL_PORT_MIN_EV,
CCI500_GLOBAL_PORT_MAX_EV,
CCI5xx_GLOBAL_PORT_MIN_EV,
CCI5xx_GLOBAL_PORT_MAX_EV,
},
},
.validate_hw_event = cci500_validate_hw_event,
.write_counters = cci5xx_pmu_write_counters,
},
[CCI550_R0] = {
.name = "CCI_550",
.fixed_hw_cntrs = 0,
.num_hw_cntrs = 8,
.cntr_size = SZ_64K,
.format_attrs = cci5xx_pmu_format_attrs,
.event_attrs = cci5xx_pmu_event_attrs,
.event_ranges = {
[CCI_IF_SLAVE] = {
CCI5xx_SLAVE_PORT_MIN_EV,
CCI5xx_SLAVE_PORT_MAX_EV,
},
[CCI_IF_MASTER] = {
CCI5xx_MASTER_PORT_MIN_EV,
CCI5xx_MASTER_PORT_MAX_EV,
},
[CCI_IF_GLOBAL] = {
CCI5xx_GLOBAL_PORT_MIN_EV,
CCI5xx_GLOBAL_PORT_MAX_EV,
},
},
.validate_hw_event = cci550_validate_hw_event,
.write_counters = cci5xx_pmu_write_counters,
},
#endif
};
@ -1419,11 +1640,15 @@ static const struct of_device_id arm_cci_pmu_matches[] = {
.data = &cci_pmu_models[CCI400_R1],
},
#endif
#ifdef CONFIG_ARM_CCI500_PMU
#ifdef CONFIG_ARM_CCI5xx_PMU
{
.compatible = "arm,cci-500-pmu,r0",
.data = &cci_pmu_models[CCI500_R0],
},
{
.compatible = "arm,cci-550-pmu,r0",
.data = &cci_pmu_models[CCI550_R0],
},
#endif
{},
};

View file

@ -13,6 +13,7 @@
#include <linux/bitmap.h>
#include <linux/cpumask.h>
#include <linux/cpu_pm.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/of_device.h>
@ -710,6 +711,93 @@ static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
return NOTIFY_OK;
}
#ifdef CONFIG_CPU_PM
static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
{
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
struct perf_event *event;
int idx;
for (idx = 0; idx < armpmu->num_events; idx++) {
/*
* If the counter is not used skip it, there is no
* need of stopping/restarting it.
*/
if (!test_bit(idx, hw_events->used_mask))
continue;
event = hw_events->events[idx];
switch (cmd) {
case CPU_PM_ENTER:
/*
* Stop and update the counter
*/
armpmu_stop(event, PERF_EF_UPDATE);
break;
case CPU_PM_EXIT:
case CPU_PM_ENTER_FAILED:
/* Restore and enable the counter */
armpmu_start(event, PERF_EF_RELOAD);
break;
default:
break;
}
}
}
static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
void *v)
{
struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
return NOTIFY_DONE;
/*
* Always reset the PMU registers on power-up even if
* there are no events running.
*/
if (cmd == CPU_PM_EXIT && armpmu->reset)
armpmu->reset(armpmu);
if (!enabled)
return NOTIFY_OK;
switch (cmd) {
case CPU_PM_ENTER:
armpmu->stop(armpmu);
cpu_pm_pmu_setup(armpmu, cmd);
break;
case CPU_PM_EXIT:
cpu_pm_pmu_setup(armpmu, cmd);
case CPU_PM_ENTER_FAILED:
armpmu->start(armpmu);
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
}
static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
{
cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
}
static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
{
cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
}
#else
static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
#endif
static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
int err;
@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
if (err)
goto out_hw_events;
err = cpu_pm_pmu_register(cpu_pmu);
if (err)
goto out_unregister;
for_each_possible_cpu(cpu) {
struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
raw_spin_lock_init(&events->pmu_lock);
@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
out_unregister:
unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
out_hw_events:
free_percpu(cpu_hw_events);
return err;
@ -753,6 +847,7 @@ out_hw_events:
static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
{
cpu_pm_pmu_unregister(cpu_pmu);
unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
free_percpu(cpu_pmu->hw_events);
}
@ -889,6 +984,15 @@ int arm_pmu_device_probe(struct platform_device *pdev,
if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
init_fn = of_id->data;
pmu->secure_access = of_property_read_bool(pdev->dev.of_node,
"secure-reg-access");
/* arm64 systems boot only as non-secure */
if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
pr_warn("ignoring \"secure-reg-access\" property for arm64\n");
pmu->secure_access = false;
}
ret = of_pmu_irq_cfg(pmu);
if (!ret)
ret = init_fn(pmu);
@ -898,7 +1002,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
}
if (ret) {
pr_info("failed to probe PMU!\n");
pr_info("%s: failed to probe PMU!\n", of_node_full_name(node));
goto out_free;
}
@ -918,7 +1022,8 @@ int arm_pmu_device_probe(struct platform_device *pdev,
out_destroy:
cpu_pmu_destroy(pmu);
out_free:
pr_info("failed to register PMU devices!\n");
pr_info("%s: failed to register PMU devices!\n",
of_node_full_name(node));
kfree(pmu);
return ret;
}

View file

@ -104,9 +104,11 @@ struct arm_pmu {
atomic_t active_events;
struct mutex reserve_mutex;
u64 max_period;
bool secure_access; /* 32-bit ARM only */
struct platform_device *plat_device;
struct pmu_hw_events __percpu *hw_events;
struct notifier_block hotplug_nb;
struct notifier_block cpu_pm_nb;
};
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))