From 42c184ade43a79d36f50a0b3394b3326633f53f5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:36:44 +0800 Subject: [PATCH 01/17] perf: hisi: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 5 +---- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 4 +--- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 4 +--- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index e42d4464c2cf..453f1c6a16ca 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -243,8 +243,6 @@ MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { - struct resource *res; - /* * Use the SCCL_ID and DDRC channel ID to identify the * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. @@ -263,8 +261,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, /* DDRC PMUs only share the same SCCL */ ddrc_pmu->ccl_id = -1; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res); + ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n"); return PTR_ERR(ddrc_pmu->base); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index f28063873e11..6a1dd72d8abb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -234,7 +234,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *hha_pmu) { unsigned long long id; - struct resource *res; acpi_status status; status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), @@ -256,8 +255,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, /* HHA PMUs only share the same SCCL */ hha_pmu->ccl_id = -1; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hha_pmu->base = devm_ioremap_resource(&pdev->dev, res); + hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for hha_pmu resource\n"); return PTR_ERR(hha_pmu->base); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 078b8dc57250..1151e99b241c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -233,7 +233,6 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { unsigned long long id; - struct resource *res; acpi_status status; status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), @@ -259,8 +258,7 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res); + l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(l3c_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n"); return PTR_ERR(l3c_pmu->base); From 7fdd7f7c33d2451cf4865b95b24d57785c0ff515 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:37:35 +0800 Subject: [PATCH 02/17] perf: xgene: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 7e328d6385c3..46ee6807d533 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1282,25 +1282,21 @@ static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, struct platform_device *pdev) { void __iomem *csw_csr, *mcba_csr, *mcbb_csr; - struct resource *res; unsigned int reg; - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - csw_csr = devm_ioremap_resource(&pdev->dev, res); + csw_csr = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(csw_csr)) { dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n"); return PTR_ERR(csw_csr); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 2); - mcba_csr = devm_ioremap_resource(&pdev->dev, res); + mcba_csr = devm_platform_ioremap_resource(pdev, 2); if (IS_ERR(mcba_csr)) { dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n"); return PTR_ERR(mcba_csr); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 3); - mcbb_csr = devm_ioremap_resource(&pdev->dev, res); + mcbb_csr = devm_platform_ioremap_resource(pdev, 3); if (IS_ERR(mcbb_csr)) { dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n"); return PTR_ERR(mcbb_csr); @@ -1332,13 +1328,11 @@ static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, struct platform_device *pdev) { void __iomem *csw_csr; - struct resource *res; unsigned int reg; u32 mcb0routing; u32 mcb1routing; - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - csw_csr = devm_ioremap_resource(&pdev->dev, res); + csw_csr = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(csw_csr)) { dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n"); return PTR_ERR(csw_csr); From 1c8d96b41d8b05ed9dbc2bea03edf878f544f93e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:39:28 +0800 Subject: [PATCH 03/17] perf/arm-ccn: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 6fc0273b6129..fee5cb2e8afb 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1477,8 +1477,7 @@ static int arm_ccn_probe(struct platform_device *pdev) ccn->dev = &pdev->dev; platform_set_drvdata(pdev, ccn); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ccn->base = devm_ioremap_resource(ccn->dev, res); + ccn->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ccn->base)) return PTR_ERR(ccn->base); From 504db0f82660898cbb76478cb2eaee46817c85b8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:40:14 +0800 Subject: [PATCH 04/17] perf/arm-cci: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 8f8606b9bc9e..1b8e337a29ca 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1642,7 +1642,6 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev) static int cci_pmu_probe(struct platform_device *pdev) { - struct resource *res; struct cci_pmu *cci_pmu; int i, ret, irq; @@ -1650,8 +1649,7 @@ static int cci_pmu_probe(struct platform_device *pdev) if (IS_ERR(cci_pmu)) return PTR_ERR(cci_pmu); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - cci_pmu->base = devm_ioremap_resource(&pdev->dev, res); + cci_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cci_pmu->base)) return -ENOMEM; From c8b0de762e0b7cf9f51ad13b19bd279809317f37 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 9 Sep 2019 09:22:57 +0800 Subject: [PATCH 05/17] perf/smmuv3: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index abcf54f7d19c..773128f411f1 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -727,7 +727,7 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) static int smmu_pmu_probe(struct platform_device *pdev) { struct smmu_pmu *smmu_pmu; - struct resource *res_0, *res_1; + struct resource *res_0; u32 cfgr, reg_size; u64 ceid_64[2]; int irq, err; @@ -764,8 +764,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) /* Determine if page 1 is present */ if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) { - res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); - smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1); + smmu_pmu->reloc_base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(smmu_pmu->reloc_base)) return PTR_ERR(smmu_pmu->reloc_base); } else { From 126b0a1700c5c2688d9319286d84db7f75627810 Mon Sep 17 00:00:00 2001 From: Marek Bykowski Date: Wed, 16 Oct 2019 11:57:39 +0200 Subject: [PATCH 06/17] perf: arm-ccn: Enable stats for CCN-512 interconnect Add compatible string for the ARM CCN-512 interconnect Acked-by: Pawel Moll Signed-off-by: Marek Bykowski Signed-off-by: Boleslaw Malecki Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index fee5cb2e8afb..fea354d6fb29 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1536,6 +1536,7 @@ static int arm_ccn_remove(struct platform_device *pdev) static const struct of_device_id arm_ccn_match[] = { { .compatible = "arm,ccn-502", }, { .compatible = "arm,ccn-504", }, + { .compatible = "arm,ccn-512", }, {}, }; MODULE_DEVICE_TABLE(of, arm_ccn_match); From 05daff069f8f29436673e8f417b2d0ac805b6f69 Mon Sep 17 00:00:00 2001 From: Marek Bykowski Date: Mon, 7 Oct 2019 15:21:15 +0200 Subject: [PATCH 07/17] Documentation: Add documentation for CCN-512 DTS binding Indicate the arm-ccn perf back-end supports now ccn-512. Acked-by: Rob Herring Signed-off-by: Marek Bykowski Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/perf/arm-ccn.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/perf/arm-ccn.txt b/Documentation/devicetree/bindings/perf/arm-ccn.txt index 43b5a71a5a9d..1c53b5aa3317 100644 --- a/Documentation/devicetree/bindings/perf/arm-ccn.txt +++ b/Documentation/devicetree/bindings/perf/arm-ccn.txt @@ -6,6 +6,7 @@ Required properties: "arm,ccn-502" "arm,ccn-504" "arm,ccn-508" + "arm,ccn-512" - reg: (standard registers property) physical address and size (16MB) of the configuration registers block From 030f6f84e556759ecffbdd560713c3bfac4483e4 Mon Sep 17 00:00:00 2001 From: Ganapatrao Prabhakerrao Kulkarni Date: Wed, 16 Oct 2019 09:36:59 +0000 Subject: [PATCH 08/17] Documentation: perf: Update documentation for ThunderX2 PMU uncore driver Add documentation for Cavium Coherent Processor Interconnect (CCPI2) PMU. Signed-off-by: Ganapatrao Prabhakerrao Kulkarni Signed-off-by: Will Deacon --- .../admin-guide/perf/thunderx2-pmu.rst | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst index 08e33675853a..01f158238ae1 100644 --- a/Documentation/admin-guide/perf/thunderx2-pmu.rst +++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst @@ -3,24 +3,26 @@ Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE) ============================================================= The ThunderX2 SoC PMU consists of independent, system-wide, per-socket -PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC). +PMUs such as the Level 3 Cache (L3C), DDR4 Memory Controller (DMC) and +Cavium Coherent Processor Interconnect (CCPI2). The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles. Events are counted for the default channel (i.e. channel 0) and prorated to the total number of channels/tiles. -The DMC and L3C support up to 4 counters. Counters are independently -programmable and can be started and stopped individually. Each counter -can be set to a different event. Counters are 32-bit and do not support -an overflow interrupt; they are read every 2 seconds. +The DMC and L3C support up to 4 counters, while the CCPI2 supports up to 8 +counters. Counters are independently programmable to different events and +can be started and stopped individually. None of the counters support an +overflow interrupt. DMC and L3C counters are 32-bit and read every 2 seconds. +The CCPI2 counters are 64-bit and assumed not to overflow in normal operation. PMU UNCORE (perf) driver: The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and -L3C devices. Each PMU can be used to count up to 4 events -simultaneously. The PMUs provide a description of their available events -and configuration options under sysfs, see -/sys/devices/uncore_; S is the socket id. +L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8 +(CCPI2) events simultaneously. The PMUs provide a description of their +available events and configuration options under sysfs, see +/sys/devices/uncore_; S is the socket id. The driver does not support sampling, therefore "perf record" will not work. Per-task perf sessions are also not supported. From 5e2c27e833bb92487fdf49ff6d358361d00521c9 Mon Sep 17 00:00:00 2001 From: Ganapatrao Prabhakerrao Kulkarni Date: Wed, 16 Oct 2019 09:37:00 +0000 Subject: [PATCH 09/17] drivers/perf: Add CCPI2 PMU support in ThunderX2 UNCORE driver. CCPI2 is a low-latency high-bandwidth serial interface for inter socket connectivity of ThunderX2 processors. CCPI2 PMU supports up to 8 counters per socket. Counters are independently programmable to different events and can be started and stopped individually. The CCPI2 counters are 64-bit and do not overflow in normal operation. Signed-off-by: Ganapatrao Prabhakerrao Kulkarni Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 267 ++++++++++++++++++++++++++++++----- 1 file changed, 234 insertions(+), 33 deletions(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 43d76c85da56..51b31d6ff2c4 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -16,23 +16,36 @@ * they need to be sampled before overflow(i.e, at every 2 seconds). */ -#define TX2_PMU_MAX_COUNTERS 4 +#define TX2_PMU_DMC_L3C_MAX_COUNTERS 4 +#define TX2_PMU_CCPI2_MAX_COUNTERS 8 +#define TX2_PMU_MAX_COUNTERS TX2_PMU_CCPI2_MAX_COUNTERS + + #define TX2_PMU_DMC_CHANNELS 8 #define TX2_PMU_L3_TILES 16 #define TX2_PMU_HRTIMER_INTERVAL (2 * NSEC_PER_SEC) -#define GET_EVENTID(ev) ((ev->hw.config) & 0x1f) -#define GET_COUNTERID(ev) ((ev->hw.idx) & 0x3) +#define GET_EVENTID(ev, mask) ((ev->hw.config) & mask) +#define GET_COUNTERID(ev, mask) ((ev->hw.idx) & mask) /* 1 byte per counter(4 counters). * Event id is encoded in bits [5:1] of a byte, */ #define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1)) +/* bits[3:0] to select counters, are indexed from 8 to 15. */ +#define CCPI2_COUNTER_OFFSET 8 + #define L3C_COUNTER_CTL 0xA8 #define L3C_COUNTER_DATA 0xAC #define DMC_COUNTER_CTL 0x234 #define DMC_COUNTER_DATA 0x240 +#define CCPI2_PERF_CTL 0x108 +#define CCPI2_COUNTER_CTL 0x10C +#define CCPI2_COUNTER_SEL 0x12c +#define CCPI2_COUNTER_DATA_L 0x130 +#define CCPI2_COUNTER_DATA_H 0x134 + /* L3C event IDs */ #define L3_EVENT_READ_REQ 0xD #define L3_EVENT_WRITEBACK_REQ 0xE @@ -51,15 +64,28 @@ #define DMC_EVENT_READ_TXNS 0xF #define DMC_EVENT_MAX 0x10 +#define CCPI2_EVENT_REQ_PKT_SENT 0x3D +#define CCPI2_EVENT_SNOOP_PKT_SENT 0x65 +#define CCPI2_EVENT_DATA_PKT_SENT 0x105 +#define CCPI2_EVENT_GIC_PKT_SENT 0x12D +#define CCPI2_EVENT_MAX 0x200 + +#define CCPI2_PERF_CTL_ENABLE BIT(0) +#define CCPI2_PERF_CTL_START BIT(1) +#define CCPI2_PERF_CTL_RESET BIT(4) +#define CCPI2_EVENT_LEVEL_RISING_EDGE BIT(10) +#define CCPI2_EVENT_TYPE_EDGE_SENSITIVE BIT(11) + enum tx2_uncore_type { PMU_TYPE_L3C, PMU_TYPE_DMC, + PMU_TYPE_CCPI2, PMU_TYPE_INVALID, }; /* - * pmu on each socket has 2 uncore devices(dmc and l3c), - * each device has 4 counters. + * Each socket has 3 uncore devices associated with a PMU. The DMC and + * L3C have 4 32-bit counters and the CCPI2 has 8 64-bit counters. */ struct tx2_uncore_pmu { struct hlist_node hpnode; @@ -69,8 +95,10 @@ struct tx2_uncore_pmu { int node; int cpu; u32 max_counters; + u32 counters_mask; u32 prorate_factor; u32 max_events; + u32 events_mask; u64 hrtimer_interval; void __iomem *base; DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS); @@ -79,6 +107,7 @@ struct tx2_uncore_pmu { struct hrtimer hrtimer; const struct attribute_group **attr_groups; enum tx2_uncore_type type; + enum hrtimer_restart (*hrtimer_callback)(struct hrtimer *cb); void (*init_cntr_base)(struct perf_event *event, struct tx2_uncore_pmu *tx2_pmu); void (*stop_event)(struct perf_event *event); @@ -92,7 +121,21 @@ static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu) return container_of(pmu, struct tx2_uncore_pmu, pmu); } -PMU_FORMAT_ATTR(event, "config:0-4"); +#define TX2_PMU_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t \ +__tx2_pmu_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ + \ +static struct device_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __tx2_pmu_##_var##_show, NULL) + +TX2_PMU_FORMAT_ATTR(event, event, "config:0-4"); +TX2_PMU_FORMAT_ATTR(event_ccpi2, event, "config:0-9"); static struct attribute *l3c_pmu_format_attrs[] = { &format_attr_event.attr, @@ -104,6 +147,11 @@ static struct attribute *dmc_pmu_format_attrs[] = { NULL, }; +static struct attribute *ccpi2_pmu_format_attrs[] = { + &format_attr_event_ccpi2.attr, + NULL, +}; + static const struct attribute_group l3c_pmu_format_attr_group = { .name = "format", .attrs = l3c_pmu_format_attrs, @@ -114,6 +162,11 @@ static const struct attribute_group dmc_pmu_format_attr_group = { .attrs = dmc_pmu_format_attrs, }; +static const struct attribute_group ccpi2_pmu_format_attr_group = { + .name = "format", + .attrs = ccpi2_pmu_format_attrs, +}; + /* * sysfs event attributes */ @@ -164,6 +217,19 @@ static struct attribute *dmc_pmu_events_attrs[] = { NULL, }; +TX2_EVENT_ATTR(req_pktsent, CCPI2_EVENT_REQ_PKT_SENT); +TX2_EVENT_ATTR(snoop_pktsent, CCPI2_EVENT_SNOOP_PKT_SENT); +TX2_EVENT_ATTR(data_pktsent, CCPI2_EVENT_DATA_PKT_SENT); +TX2_EVENT_ATTR(gic_pktsent, CCPI2_EVENT_GIC_PKT_SENT); + +static struct attribute *ccpi2_pmu_events_attrs[] = { + &tx2_pmu_event_attr_req_pktsent.attr.attr, + &tx2_pmu_event_attr_snoop_pktsent.attr.attr, + &tx2_pmu_event_attr_data_pktsent.attr.attr, + &tx2_pmu_event_attr_gic_pktsent.attr.attr, + NULL, +}; + static const struct attribute_group l3c_pmu_events_attr_group = { .name = "events", .attrs = l3c_pmu_events_attrs, @@ -174,6 +240,11 @@ static const struct attribute_group dmc_pmu_events_attr_group = { .attrs = dmc_pmu_events_attrs, }; +static const struct attribute_group ccpi2_pmu_events_attr_group = { + .name = "events", + .attrs = ccpi2_pmu_events_attrs, +}; + /* * sysfs cpumask attributes */ @@ -213,6 +284,13 @@ static const struct attribute_group *dmc_pmu_attr_groups[] = { NULL }; +static const struct attribute_group *ccpi2_pmu_attr_groups[] = { + &ccpi2_pmu_format_attr_group, + &pmu_cpumask_attr_group, + &ccpi2_pmu_events_attr_group, + NULL +}; + static inline u32 reg_readl(unsigned long addr) { return readl((void __iomem *)addr); @@ -245,33 +323,58 @@ static void init_cntr_base_l3c(struct perf_event *event, struct tx2_uncore_pmu *tx2_pmu) { struct hw_perf_event *hwc = &event->hw; + u32 cmask; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; /* counter ctrl/data reg offset at 8 */ hwc->config_base = (unsigned long)tx2_pmu->base - + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event)); + + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event, cmask)); hwc->event_base = (unsigned long)tx2_pmu->base - + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event)); + + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event, cmask)); } static void init_cntr_base_dmc(struct perf_event *event, struct tx2_uncore_pmu *tx2_pmu) { struct hw_perf_event *hwc = &event->hw; + u32 cmask; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; hwc->config_base = (unsigned long)tx2_pmu->base + DMC_COUNTER_CTL; /* counter data reg offset at 0xc */ hwc->event_base = (unsigned long)tx2_pmu->base - + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event)); + + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event, cmask)); +} + +static void init_cntr_base_ccpi2(struct perf_event *event, + struct tx2_uncore_pmu *tx2_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + u32 cmask; + + cmask = tx2_pmu->counters_mask; + + hwc->config_base = (unsigned long)tx2_pmu->base + + CCPI2_COUNTER_CTL + (4 * GET_COUNTERID(event, cmask)); + hwc->event_base = (unsigned long)tx2_pmu->base; } static void uncore_start_event_l3c(struct perf_event *event, int flags) { - u32 val; + u32 val, emask; struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + emask = tx2_pmu->events_mask; /* event id encoded in bits [07:03] */ - val = GET_EVENTID(event) << 3; + val = GET_EVENTID(event, emask) << 3; reg_writel(val, hwc->config_base); local64_set(&hwc->prev_count, 0); reg_writel(0, hwc->event_base); @@ -284,10 +387,17 @@ static inline void uncore_stop_event_l3c(struct perf_event *event) static void uncore_start_event_dmc(struct perf_event *event, int flags) { - u32 val; + u32 val, cmask, emask; struct hw_perf_event *hwc = &event->hw; - int idx = GET_COUNTERID(event); - int event_id = GET_EVENTID(event); + struct tx2_uncore_pmu *tx2_pmu; + int idx, event_id; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; + emask = tx2_pmu->events_mask; + + idx = GET_COUNTERID(event, cmask); + event_id = GET_EVENTID(event, emask); /* enable and start counters. * 8 bits for each counter, bits[05:01] of a counter to set event type. @@ -302,9 +412,14 @@ static void uncore_start_event_dmc(struct perf_event *event, int flags) static void uncore_stop_event_dmc(struct perf_event *event) { - u32 val; + u32 val, cmask; struct hw_perf_event *hwc = &event->hw; - int idx = GET_COUNTERID(event); + struct tx2_uncore_pmu *tx2_pmu; + int idx; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; + idx = GET_COUNTERID(event, cmask); /* clear event type(bits[05:01]) to stop counter */ val = reg_readl(hwc->config_base); @@ -312,27 +427,72 @@ static void uncore_stop_event_dmc(struct perf_event *event) reg_writel(val, hwc->config_base); } +static void uncore_start_event_ccpi2(struct perf_event *event, int flags) +{ + u32 emask; + struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + emask = tx2_pmu->events_mask; + + /* Bit [09:00] to set event id. + * Bits [10], set level to rising edge. + * Bits [11], set type to edge sensitive. + */ + reg_writel((CCPI2_EVENT_TYPE_EDGE_SENSITIVE | + CCPI2_EVENT_LEVEL_RISING_EDGE | + GET_EVENTID(event, emask)), hwc->config_base); + + /* reset[4], enable[0] and start[1] counters */ + reg_writel(CCPI2_PERF_CTL_RESET | + CCPI2_PERF_CTL_START | + CCPI2_PERF_CTL_ENABLE, + hwc->event_base + CCPI2_PERF_CTL); + local64_set(&event->hw.prev_count, 0ULL); +} + +static void uncore_stop_event_ccpi2(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* disable and stop counter */ + reg_writel(0, hwc->event_base + CCPI2_PERF_CTL); +} + static void tx2_uncore_event_update(struct perf_event *event) { - s64 prev, delta, new = 0; + u64 prev, delta, new = 0; struct hw_perf_event *hwc = &event->hw; struct tx2_uncore_pmu *tx2_pmu; enum tx2_uncore_type type; u32 prorate_factor; + u32 cmask, emask; tx2_pmu = pmu_to_tx2_pmu(event->pmu); type = tx2_pmu->type; + cmask = tx2_pmu->counters_mask; + emask = tx2_pmu->events_mask; prorate_factor = tx2_pmu->prorate_factor; - - new = reg_readl(hwc->event_base); - prev = local64_xchg(&hwc->prev_count, new); - - /* handles rollover of 32 bit counter */ - delta = (u32)(((1UL << 32) - prev) + new); + if (type == PMU_TYPE_CCPI2) { + reg_writel(CCPI2_COUNTER_OFFSET + + GET_COUNTERID(event, cmask), + hwc->event_base + CCPI2_COUNTER_SEL); + new = reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_H); + new = (new << 32) + + reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_L); + prev = local64_xchg(&hwc->prev_count, new); + delta = new - prev; + } else { + new = reg_readl(hwc->event_base); + prev = local64_xchg(&hwc->prev_count, new); + /* handles rollover of 32 bit counter */ + delta = (u32)(((1UL << 32) - prev) + new); + } /* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */ if (type == PMU_TYPE_DMC && - GET_EVENTID(event) == DMC_EVENT_DATA_TRANSFERS) + GET_EVENTID(event, emask) == DMC_EVENT_DATA_TRANSFERS) delta = delta/4; /* L3C and DMC has 16 and 8 interleave channels respectively. @@ -351,6 +511,7 @@ static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev) } devices[] = { {"CAV901D", PMU_TYPE_L3C}, {"CAV901F", PMU_TYPE_DMC}, + {"CAV901E", PMU_TYPE_CCPI2}, {"", PMU_TYPE_INVALID} }; @@ -380,7 +541,8 @@ static bool tx2_uncore_validate_event(struct pmu *pmu, * Make sure the group of events can be scheduled at once * on the PMU. */ -static bool tx2_uncore_validate_event_group(struct perf_event *event) +static bool tx2_uncore_validate_event_group(struct perf_event *event, + int max_counters) { struct perf_event *sibling, *leader = event->group_leader; int counters = 0; @@ -403,7 +565,7 @@ static bool tx2_uncore_validate_event_group(struct perf_event *event) * If the group requires more counters than the HW has, * it cannot ever be scheduled. */ - return counters <= TX2_PMU_MAX_COUNTERS; + return counters <= max_counters; } @@ -439,7 +601,7 @@ static int tx2_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config; /* Validate the group */ - if (!tx2_uncore_validate_event_group(event)) + if (!tx2_uncore_validate_event_group(event, tx2_pmu->max_counters)) return -EINVAL; return 0; @@ -456,6 +618,10 @@ static void tx2_uncore_event_start(struct perf_event *event, int flags) tx2_pmu->start_event(event, flags); perf_event_update_userpage(event); + /* No hrtimer needed for CCPI2, 64-bit counters */ + if (!tx2_pmu->hrtimer_callback) + return; + /* Start timer for first event */ if (bitmap_weight(tx2_pmu->active_counters, tx2_pmu->max_counters) == 1) { @@ -510,15 +676,23 @@ static void tx2_uncore_event_del(struct perf_event *event, int flags) { struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + u32 cmask; + cmask = tx2_pmu->counters_mask; tx2_uncore_event_stop(event, PERF_EF_UPDATE); /* clear the assigned counter */ - free_counter(tx2_pmu, GET_COUNTERID(event)); + free_counter(tx2_pmu, GET_COUNTERID(event, cmask)); perf_event_update_userpage(event); tx2_pmu->events[hwc->idx] = NULL; hwc->idx = -1; + + if (!tx2_pmu->hrtimer_callback) + return; + + if (bitmap_empty(tx2_pmu->active_counters, tx2_pmu->max_counters)) + hrtimer_cancel(&tx2_pmu->hrtimer); } static void tx2_uncore_event_read(struct perf_event *event) @@ -580,8 +754,12 @@ static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu) cpu_online_mask); tx2_pmu->cpu = cpu; - hrtimer_init(&tx2_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - tx2_pmu->hrtimer.function = tx2_hrtimer_callback; + + if (tx2_pmu->hrtimer_callback) { + hrtimer_init(&tx2_pmu->hrtimer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback; + } ret = tx2_uncore_pmu_register(tx2_pmu); if (ret) { @@ -653,10 +831,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, switch (tx2_pmu->type) { case PMU_TYPE_L3C: - tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; + tx2_pmu->counters_mask = 0x3; tx2_pmu->prorate_factor = TX2_PMU_L3_TILES; tx2_pmu->max_events = L3_EVENT_MAX; + tx2_pmu->events_mask = 0x1f; tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; + tx2_pmu->hrtimer_callback = tx2_hrtimer_callback; tx2_pmu->attr_groups = l3c_pmu_attr_groups; tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, "uncore_l3c_%d", tx2_pmu->node); @@ -665,10 +846,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, tx2_pmu->stop_event = uncore_stop_event_l3c; break; case PMU_TYPE_DMC: - tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; + tx2_pmu->counters_mask = 0x3; tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS; tx2_pmu->max_events = DMC_EVENT_MAX; + tx2_pmu->events_mask = 0x1f; tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; + tx2_pmu->hrtimer_callback = tx2_hrtimer_callback; tx2_pmu->attr_groups = dmc_pmu_attr_groups; tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, "uncore_dmc_%d", tx2_pmu->node); @@ -676,6 +860,21 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, tx2_pmu->start_event = uncore_start_event_dmc; tx2_pmu->stop_event = uncore_stop_event_dmc; break; + case PMU_TYPE_CCPI2: + /* CCPI2 has 8 counters */ + tx2_pmu->max_counters = TX2_PMU_CCPI2_MAX_COUNTERS; + tx2_pmu->counters_mask = 0x7; + tx2_pmu->prorate_factor = 1; + tx2_pmu->max_events = CCPI2_EVENT_MAX; + tx2_pmu->events_mask = 0x1ff; + tx2_pmu->attr_groups = ccpi2_pmu_attr_groups; + tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, + "uncore_ccpi2_%d", tx2_pmu->node); + tx2_pmu->init_cntr_base = init_cntr_base_ccpi2; + tx2_pmu->start_event = uncore_start_event_ccpi2; + tx2_pmu->stop_event = uncore_stop_event_ccpi2; + tx2_pmu->hrtimer_callback = NULL; + break; case PMU_TYPE_INVALID: devm_kfree(dev, tx2_pmu); return NULL; @@ -744,7 +943,9 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, if (cpu != tx2_pmu->cpu) return 0; - hrtimer_cancel(&tx2_pmu->hrtimer); + if (tx2_pmu->hrtimer_callback) + hrtimer_cancel(&tx2_pmu->hrtimer); + cpumask_copy(&cpu_online_mask_temp, cpu_online_mask); cpumask_clear_cpu(cpu, &cpu_online_mask_temp); new_cpu = cpumask_any_and( From 9ef8567ccf2eb00473b1280d0911caf3f413dc67 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Wed, 30 Oct 2019 11:46:17 +0800 Subject: [PATCH 10/17] arm64: perf: Simplify the ARMv8 PMUv3 event attributes For each PMU event, there is a ARMV8_EVENT_ATTR(xx, XX) and &armv8_event_attr_xx.attr.attr. Let's redefine the ARMV8_EVENT_ATTR to simplify the armv8_pmuv3_event_attrs. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Shaokun Zhang [will: Dropped unnecessary array syntax] Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 191 ++++++++++++--------------------- 1 file changed, 66 insertions(+), 125 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a0b4f1bca491..e40b65645c86 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%03llx\n", pmu_attr->id); } -#define ARMV8_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR(name, armv8_event_attr_##name, \ - config, armv8pmu_events_sysfs_show) - -ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR); -ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL); -ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL); -ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE); -ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL); -ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED); -ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED); -ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED); -ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN); -ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN); -ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED); -ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED); -ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED); -ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED); -ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED); -ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED); -ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES); -ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED); -ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS); -ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE); -ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB); -ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE); -ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB); -ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS); -ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR); -ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC); -ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED); -ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES); -/* Don't expose the chain event in /sys, since it's useless in isolation */ -ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED); -ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED); -ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND); -ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND); -ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB); -ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB); -ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE); -ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL); -ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE); -ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB); -ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL); -ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL); -ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB); -ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB); -ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS); -ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE); -ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS); -ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK); -ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK); -ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD); -ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD); -ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD); -ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP); -ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED); -ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE); -ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION); +#define ARMV8_EVENT_ATTR(name, config) \ + (&((struct perf_pmu_events_attr) { \ + .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \ + .id = config, \ + }).attr.attr) static struct attribute *armv8_pmuv3_event_attrs[] = { - &armv8_event_attr_sw_incr.attr.attr, - &armv8_event_attr_l1i_cache_refill.attr.attr, - &armv8_event_attr_l1i_tlb_refill.attr.attr, - &armv8_event_attr_l1d_cache_refill.attr.attr, - &armv8_event_attr_l1d_cache.attr.attr, - &armv8_event_attr_l1d_tlb_refill.attr.attr, - &armv8_event_attr_ld_retired.attr.attr, - &armv8_event_attr_st_retired.attr.attr, - &armv8_event_attr_inst_retired.attr.attr, - &armv8_event_attr_exc_taken.attr.attr, - &armv8_event_attr_exc_return.attr.attr, - &armv8_event_attr_cid_write_retired.attr.attr, - &armv8_event_attr_pc_write_retired.attr.attr, - &armv8_event_attr_br_immed_retired.attr.attr, - &armv8_event_attr_br_return_retired.attr.attr, - &armv8_event_attr_unaligned_ldst_retired.attr.attr, - &armv8_event_attr_br_mis_pred.attr.attr, - &armv8_event_attr_cpu_cycles.attr.attr, - &armv8_event_attr_br_pred.attr.attr, - &armv8_event_attr_mem_access.attr.attr, - &armv8_event_attr_l1i_cache.attr.attr, - &armv8_event_attr_l1d_cache_wb.attr.attr, - &armv8_event_attr_l2d_cache.attr.attr, - &armv8_event_attr_l2d_cache_refill.attr.attr, - &armv8_event_attr_l2d_cache_wb.attr.attr, - &armv8_event_attr_bus_access.attr.attr, - &armv8_event_attr_memory_error.attr.attr, - &armv8_event_attr_inst_spec.attr.attr, - &armv8_event_attr_ttbr_write_retired.attr.attr, - &armv8_event_attr_bus_cycles.attr.attr, - &armv8_event_attr_l1d_cache_allocate.attr.attr, - &armv8_event_attr_l2d_cache_allocate.attr.attr, - &armv8_event_attr_br_retired.attr.attr, - &armv8_event_attr_br_mis_pred_retired.attr.attr, - &armv8_event_attr_stall_frontend.attr.attr, - &armv8_event_attr_stall_backend.attr.attr, - &armv8_event_attr_l1d_tlb.attr.attr, - &armv8_event_attr_l1i_tlb.attr.attr, - &armv8_event_attr_l2i_cache.attr.attr, - &armv8_event_attr_l2i_cache_refill.attr.attr, - &armv8_event_attr_l3d_cache_allocate.attr.attr, - &armv8_event_attr_l3d_cache_refill.attr.attr, - &armv8_event_attr_l3d_cache.attr.attr, - &armv8_event_attr_l3d_cache_wb.attr.attr, - &armv8_event_attr_l2d_tlb_refill.attr.attr, - &armv8_event_attr_l2i_tlb_refill.attr.attr, - &armv8_event_attr_l2d_tlb.attr.attr, - &armv8_event_attr_l2i_tlb.attr.attr, - &armv8_event_attr_remote_access.attr.attr, - &armv8_event_attr_ll_cache.attr.attr, - &armv8_event_attr_ll_cache_miss.attr.attr, - &armv8_event_attr_dtlb_walk.attr.attr, - &armv8_event_attr_itlb_walk.attr.attr, - &armv8_event_attr_ll_cache_rd.attr.attr, - &armv8_event_attr_ll_cache_miss_rd.attr.attr, - &armv8_event_attr_remote_access_rd.attr.attr, - &armv8_event_attr_sample_pop.attr.attr, - &armv8_event_attr_sample_feed.attr.attr, - &armv8_event_attr_sample_filtrate.attr.attr, - &armv8_event_attr_sample_collision.attr.attr, + ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), + ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), + ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE), + ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL), + ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED), + ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED), + ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED), + ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN), + ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN), + ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED), + ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED), + ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED), + ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED), + ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED), + ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES), + ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED), + ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS), + ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE), + ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE), + ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB), + ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS), + ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR), + ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC), + ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED), + ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES), + /* Don't expose the chain event in /sys, since it's useless in isolation */ + ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED), + ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND), + ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND), + ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB), + ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB), + ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE), + ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE), + ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL), + ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL), + ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB), + ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB), + ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS), + ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE), + ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS), + ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK), + ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK), + ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), + ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), + ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), + ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), + ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), + ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), + ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), NULL, }; From 76d835fcd429615b61fccbe65de4f53360a89ca7 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:10 +0000 Subject: [PATCH 11/17] docs/perf: Add explanation for DDR_CAP_AXI_ID_FILTER_ENHANCED quirk Add explanation for DDR_CAP_AXI_ID_FILTER_ENHANCED quirk. Signed-off-by: Joakim Zhang [will: Simplified wording] Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/imx-ddr.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst index 517a205abad6..66988b481217 100644 --- a/Documentation/admin-guide/perf/imx-ddr.rst +++ b/Documentation/admin-guide/perf/imx-ddr.rst @@ -50,3 +50,8 @@ in the driver. axi_id to monitor a specific id, rather than having to specify axi_mask. e.g.:: perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12 + +* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk. + This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits + counting the number of bytes (as opposed to the number of bursts) from DDR + read and write transactions concurrently with another set of data counters. From 1178addaca66292b309b3cbbdc12421cef4894b2 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:13 +0000 Subject: [PATCH 12/17] bindings: perf: imx-ddr: Add new compatible string Add new compatible string for i.MX8MPlus DDR PMU core. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt index d77e3f26f9e6..7822a806ea0a 100644 --- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt +++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt @@ -5,6 +5,7 @@ Required properties: - compatible: should be one of: "fsl,imx8-ddr-pmu" "fsl,imx8m-ddr-pmu" + "fsl,imx8mp-ddr-pmu" - reg: physical address and size From 44f8bd014a94ed679ddb77d0b92350d4ac4f23a5 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:16 +0000 Subject: [PATCH 13/17] perf/imx_ddr: Add enhanced AXI ID filter support With DDR_CAP_AXI_ID_FILTER quirk, indicating HW supports AXI ID filter which only can get bursts from DDR transaction, i.e. DDR read/write requests. This patch add DDR_CAP_AXI_ID_ENHANCED_FILTER quirk, indicating HW supports AXI ID filter which can get bursts and bytes from DDR transaction at the same time. We hope PMU always return bytes in the driver due to it is more meaningful for users. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 63 +++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index ce7345745b42..2a3966d059e7 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -45,7 +45,8 @@ static DEFINE_IDA(ddr_ida); /* DDR Perf hardware feature */ -#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ +#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ +#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */ struct fsl_ddr_devtype_data { unsigned int quirks; /* quirks needed for different DDR Perf core */ @@ -178,6 +179,36 @@ static const struct attribute_group *attr_groups[] = { NULL, }; +static bool ddr_perf_is_filtered(struct perf_event *event) +{ + return event->attr.config == 0x41 || event->attr.config == 0x42; +} + +static u32 ddr_perf_filter_val(struct perf_event *event) +{ + return event->attr.config1; +} + +static bool ddr_perf_filters_compatible(struct perf_event *a, + struct perf_event *b) +{ + if (!ddr_perf_is_filtered(a)) + return true; + if (!ddr_perf_is_filtered(b)) + return true; + return ddr_perf_filter_val(a) == ddr_perf_filter_val(b); +} + +static bool ddr_perf_is_enhanced_filtered(struct perf_event *event) +{ + unsigned int filt; + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + + filt = pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED; + return (filt == DDR_CAP_AXI_ID_FILTER_ENHANCED) && + ddr_perf_is_filtered(event); +} + static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event) { int i; @@ -209,27 +240,17 @@ static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter) static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter) { - return readl_relaxed(pmu->base + COUNTER_READ + counter * 4); -} + struct perf_event *event = pmu->events[counter]; + void __iomem *base = pmu->base; -static bool ddr_perf_is_filtered(struct perf_event *event) -{ - return event->attr.config == 0x41 || event->attr.config == 0x42; -} - -static u32 ddr_perf_filter_val(struct perf_event *event) -{ - return event->attr.config1; -} - -static bool ddr_perf_filters_compatible(struct perf_event *a, - struct perf_event *b) -{ - if (!ddr_perf_is_filtered(a)) - return true; - if (!ddr_perf_is_filtered(b)) - return true; - return ddr_perf_filter_val(a) == ddr_perf_filter_val(b); + /* + * return bytes instead of bursts from ddr transaction for + * axid-read and axid-write event if PMU core supports enhanced + * filter. + */ + base += ddr_perf_is_enhanced_filtered(event) ? COUNTER_DPCR1 : + COUNTER_READ; + return readl_relaxed(base + counter * 4); } static int ddr_perf_event_init(struct perf_event *event) From d3eeece9a8ab87f98fa60664fd1ce817661f39cb Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:20 +0000 Subject: [PATCH 14/17] perf/imx_ddr: Add driver for DDR PMU in i.MX8MPlus Add driver for DDR PMU in i.MX8MPlus. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 2a3966d059e7..3bbf806209a6 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -58,9 +58,14 @@ static const struct fsl_ddr_devtype_data imx8m_devtype_data = { .quirks = DDR_CAP_AXI_ID_FILTER, }; +static const struct fsl_ddr_devtype_data imx8mp_devtype_data = { + .quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED, +}; + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data}, { .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data}, + { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); From ed0207a33adda11db88c9c3b6066f0c6051119fe Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 4 Nov 2019 07:09:20 +0000 Subject: [PATCH 15/17] docs/perf: Add AXI ID filter capabilities information Add capabilities information for AXI ID filter. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/imx-ddr.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst index 66988b481217..90056e4e8859 100644 --- a/Documentation/admin-guide/perf/imx-ddr.rst +++ b/Documentation/admin-guide/perf/imx-ddr.rst @@ -17,7 +17,8 @@ The "format" directory describes format of the config (event ID) and config1 (AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/ devices/imx8_ddr0/format/. The "events" directory describes the events types hardware supported that can be used with perf tool, see /sys/bus/event_source/ -devices/imx8_ddr0/events/. +devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented +in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/. e.g.:: perf stat -a -e imx8_ddr0/cycles/ cmd perf stat -a -e imx8_ddr0/read/,imx8_ddr0/write/ cmd @@ -25,9 +26,12 @@ devices/imx8_ddr0/events/. AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write) to count reading or writing matches filter setting. Filter setting is various from different DRAM controller implementations, which is distinguished by quirks -in the driver. +in the driver. You also can dump info from userspace, filter in "caps" directory +indicates whether PMU supports AXI ID filter or not; enhanced_filter indicates +whether PMU supports enhanced AXI ID filter or not. Value 0 for un-supported, and +value 1 for supported. -* With DDR_CAP_AXI_ID_FILTER quirk. +* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0). Filter is defined with two configuration parts: --AXI_ID defines AxID matching value. --AXI_MASKING defines which bits of AxID are meaningful for the matching. @@ -51,7 +55,7 @@ in the driver. e.g.:: perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12 -* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk. +* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1). This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits counting the number of bytes (as opposed to the number of bursts) from DDR read and write transactions concurrently with another set of data counters. From f1d303a1b5dd75d18734a2ca56ca90691639a79d Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 4 Nov 2019 07:09:24 +0000 Subject: [PATCH 16/17] perf/imx_ddr: Dump AXI ID filter info to userspace caps/filter indicates whether HW supports AXI ID filter or not. caps/enhanced_filter indicates whether HW supports enhanced AXI ID filter or not. Users can check filter features from userspace with these attributions. Suggested-by: Will Deacon Signed-off-by: Joakim Zhang [will: reworked cap switch to be less error-prone] Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 3bbf806209a6..55083c67b2bb 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -84,6 +84,61 @@ struct ddr_pmu { int id; }; +enum ddr_perf_filter_capabilities { + PERF_CAP_AXI_ID_FILTER = 0, + PERF_CAP_AXI_ID_FILTER_ENHANCED, + PERF_CAP_AXI_ID_FEAT_MAX, +}; + +static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap) +{ + u32 quirks = pmu->devtype_data->quirks; + + switch (cap) { + case PERF_CAP_AXI_ID_FILTER: + return !!(quirks & DDR_CAP_AXI_ID_FILTER); + case PERF_CAP_AXI_ID_FILTER_ENHANCED: + quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED; + return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED; + default: + WARN(1, "unknown filter cap %d\n", cap); + } + + return 0; +} + +static ssize_t ddr_perf_filter_cap_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + struct dev_ext_attribute *ea = + container_of(attr, struct dev_ext_attribute, attr); + int cap = (long)ea->var; + + return snprintf(buf, PAGE_SIZE, "%u\n", + ddr_perf_filter_cap_get(pmu, cap)); +} + +#define PERF_EXT_ATTR_ENTRY(_name, _func, _var) \ + (&((struct dev_ext_attribute) { \ + __ATTR(_name, 0444, _func, NULL), (void *)_var \ + }).attr.attr) + +#define PERF_FILTER_EXT_ATTR_ENTRY(_name, _var) \ + PERF_EXT_ATTR_ENTRY(_name, ddr_perf_filter_cap_show, _var) + +static struct attribute *ddr_perf_filter_cap_attr[] = { + PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER), + PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED), + NULL, +}; + +static struct attribute_group ddr_perf_filter_cap_attr_group = { + .name = "caps", + .attrs = ddr_perf_filter_cap_attr, +}; + static ssize_t ddr_perf_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -181,6 +236,7 @@ static const struct attribute_group *attr_groups[] = { &ddr_perf_events_attr_group, &ddr_perf_format_attr_group, &ddr_perf_cpumask_attr_group, + &ddr_perf_filter_cap_attr_group, NULL, }; From 8703317ae576c9bf3e07e5b97275e3f957e8d74b Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 7 Nov 2019 15:56:04 +0800 Subject: [PATCH 17/17] drivers/perf: hisi: update the sccl_id/ccl_id for certain HiSilicon platform For some HiSilicon platform, the originally designed SCCL_ID and CCL_ID are not satisfied with much rich topology when the MT is set, so we extend the SCCL_ID to MPIDR[aff3] and CCL_ID to MPIDR[aff2]. Let's update this for HiSilicon uncore PMU driver. Cc: John Garry Cc: Hanjun Guo Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 79f76f8dda8e..96183e31b96a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -15,6 +15,7 @@ #include #include +#include #include #include "hisi_uncore_pmu.h" @@ -338,8 +339,10 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) /* * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. - * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2] - * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID + * If multi-threading is supported, On Huawei Kunpeng 920 SoC whose cpu + * core is tsv110, CCL_ID is the low 3-bits in MPIDR[Aff2] and SCCL_ID + * is the upper 5-bits of Aff2 field; while for other cpu types, SCCL_ID + * is in MPIDR[Aff3] and CCL_ID is in MPIDR[Aff2], if not, SCCL_ID * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. */ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) @@ -347,12 +350,19 @@ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) u64 mpidr = read_cpuid_mpidr(); if (mpidr & MPIDR_MT_BITMASK) { - int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + if (read_cpuid_part_number() == HISI_CPU_PART_TSV110) { + int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); - if (sccl_id) - *sccl_id = aff2 >> 3; - if (ccl_id) - *ccl_id = aff2 & 0x7; + if (sccl_id) + *sccl_id = aff2 >> 3; + if (ccl_id) + *ccl_id = aff2 & 0x7; + } else { + if (sccl_id) + *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); + if (ccl_id) + *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } } else { if (sccl_id) *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);