From c9d8d661ece808f987e606d73d57e3bd6849ac6a Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Mon, 10 Sep 2018 19:19:15 +0530 Subject: [PATCH 01/39] Documentation: fsl-mc: add iommu-map device-tree binding for fsl-mc bus The existing IOMMU bindings cannot be used to specify the relationship between fsl-mc devices and IOMMUs. This patch adds a generic binding for mapping fsl-mc devices to IOMMUs, using iommu-map property. Signed-off-by: Nipun Gupta Reviewed-by: Rob Herring Acked-by: Robin Murphy Signed-off-by: Joerg Roedel --- .../devicetree/bindings/misc/fsl,qoriq-mc.txt | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt index 6611a7c2053a..01fdc33a41d0 100644 --- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt +++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt @@ -9,6 +9,25 @@ blocks that can be used to create functional hardware objects/devices such as network interfaces, crypto accelerator instances, L2 switches, etc. +For an overview of the DPAA2 architecture and fsl-mc bus see: +Documentation/networking/dpaa2/overview.rst + +As described in the above overview, all DPAA2 objects in a DPRC share the +same hardware "isolation context" and a 10-bit value called an ICID +(isolation context id) is expressed by the hardware to identify +the requester. + +The generic 'iommus' property is insufficient to describe the relationship +between ICIDs and IOMMUs, so an iommu-map property is used to define +the set of possible ICIDs under a root DPRC and how they map to +an IOMMU. + +For generic IOMMU bindings, see +Documentation/devicetree/bindings/iommu/iommu.txt. + +For arm-smmu binding, see: +Documentation/devicetree/bindings/iommu/arm,smmu.txt. + Required properties: - compatible @@ -88,14 +107,34 @@ Sub-nodes: Value type: Definition: Specifies the phandle to the PHY device node associated with the this dpmac. +Optional properties: + +- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier + data. + + The property is an arbitrary number of tuples of + (icid-base,iommu,iommu-base,length). + + Any ICID i in the interval [icid-base, icid-base + length) is + associated with the listed IOMMU, with the iommu-specifier + (i - icid-base + iommu-base). Example: + smmu: iommu@5000000 { + compatible = "arm,mmu-500"; + #iommu-cells = <1>; + stream-match-mask = <0x7C00>; + ... + }; + fsl_mc: fsl-mc@80c000000 { compatible = "fsl,qoriq-mc"; reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */ msi-parent = <&its>; + /* define map for ICIDs 23-64 */ + iommu-map = <23 &smmu 23 41>; #address-cells = <3>; #size-cells = <1>; From 2a6db719c92dbfe43c9eea7e4358ea2e51b5004e Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Mon, 10 Sep 2018 19:19:16 +0530 Subject: [PATCH 02/39] iommu/of: make of_pci_map_rid() available for other devices too iommu-map property is also used by devices with fsl-mc. This patch moves the of_pci_map_rid to generic location, so that it can be used by other busses too. 'of_pci_map_rid' is renamed here to 'of_map_rid' and there is no functional change done in the API. Signed-off-by: Nipun Gupta Reviewed-by: Rob Herring Reviewed-by: Robin Murphy Acked-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 5 +- drivers/of/base.c | 102 +++++++++++++++++++++++++++++++++++++++ drivers/of/irq.c | 5 +- drivers/pci/of.c | 101 -------------------------------------- include/linux/of.h | 11 +++++ include/linux/of_pci.h | 10 ---- 6 files changed, 117 insertions(+), 117 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index f7787e757244..c13eecac0e8a 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -132,9 +132,8 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; - err = of_pci_map_rid(info->np, alias, "iommu-map", - "iommu-map-mask", &iommu_spec.np, - iommu_spec.args); + err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask", + &iommu_spec.np, iommu_spec.args); if (err) return err == -ENODEV ? NO_IOMMU : err; diff --git a/drivers/of/base.c b/drivers/of/base.c index 74eaedd5b860..90bf7d9fa17b 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -2045,3 +2045,105 @@ int of_find_last_cache_level(unsigned int cpu) return cache_level; } + +/** + * of_map_rid - Translate a requester ID through a downstream mapping. + * @np: root complex device node. + * @rid: device requester ID to map. + * @map_name: property name of the map to use. + * @map_mask_name: optional property name of the mask to use. + * @target: optional pointer to a target device node. + * @id_out: optional pointer to receive the translated ID. + * + * Given a device requester ID, look up the appropriate implementation-defined + * platform ID and/or the target device which receives transactions on that + * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or + * @id_out may be NULL if only the other is required. If @target points to + * a non-NULL device node pointer, only entries targeting that node will be + * matched; if it points to a NULL value, it will receive the device node of + * the first matching target phandle, with a reference held. + * + * Return: 0 on success or a standard error code on failure. + */ +int of_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out) +{ + u32 map_mask, masked_rid; + int map_len; + const __be32 *map = NULL; + + if (!np || !map_name || (!target && !id_out)) + return -EINVAL; + + map = of_get_property(np, map_name, &map_len); + if (!map) { + if (target) + return -ENODEV; + /* Otherwise, no map implies no translation */ + *id_out = rid; + return 0; + } + + if (!map_len || map_len % (4 * sizeof(*map))) { + pr_err("%pOF: Error: Bad %s length: %d\n", np, + map_name, map_len); + return -EINVAL; + } + + /* The default is to select all bits. */ + map_mask = 0xffffffff; + + /* + * Can be overridden by "{iommu,msi}-map-mask" property. + * If of_property_read_u32() fails, the default is used. + */ + if (map_mask_name) + of_property_read_u32(np, map_mask_name, &map_mask); + + masked_rid = map_mask & rid; + for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) { + struct device_node *phandle_node; + u32 rid_base = be32_to_cpup(map + 0); + u32 phandle = be32_to_cpup(map + 1); + u32 out_base = be32_to_cpup(map + 2); + u32 rid_len = be32_to_cpup(map + 3); + + if (rid_base & ~map_mask) { + pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n", + np, map_name, map_name, + map_mask, rid_base); + return -EFAULT; + } + + if (masked_rid < rid_base || masked_rid >= rid_base + rid_len) + continue; + + phandle_node = of_find_node_by_phandle(phandle); + if (!phandle_node) + return -ENODEV; + + if (target) { + if (*target) + of_node_put(phandle_node); + else + *target = phandle_node; + + if (*target != phandle_node) + continue; + } + + if (id_out) + *id_out = masked_rid - rid_base + out_base; + + pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n", + np, map_name, map_mask, rid_base, out_base, + rid_len, rid, masked_rid - rid_base + out_base); + return 0; + } + + pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n", + np, map_name, rid, target && *target ? *target : NULL); + return -EFAULT; +} +EXPORT_SYMBOL_GPL(of_map_rid); diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 02ad93a304a4..e1f6f392a4c0 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -588,8 +587,8 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np, * "msi-map" property. */ for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) - if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map", - "msi-map-mask", np, &rid_out)) + if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map", + "msi-map-mask", np, &rid_out)) break; return rid_out; } diff --git a/drivers/pci/of.c b/drivers/pci/of.c index 1836b8ddf292..4c4217d0c3f1 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -355,107 +355,6 @@ failed: EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources); #endif /* CONFIG_OF_ADDRESS */ -/** - * of_pci_map_rid - Translate a requester ID through a downstream mapping. - * @np: root complex device node. - * @rid: PCI requester ID to map. - * @map_name: property name of the map to use. - * @map_mask_name: optional property name of the mask to use. - * @target: optional pointer to a target device node. - * @id_out: optional pointer to receive the translated ID. - * - * Given a PCI requester ID, look up the appropriate implementation-defined - * platform ID and/or the target device which receives transactions on that - * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or - * @id_out may be NULL if only the other is required. If @target points to - * a non-NULL device node pointer, only entries targeting that node will be - * matched; if it points to a NULL value, it will receive the device node of - * the first matching target phandle, with a reference held. - * - * Return: 0 on success or a standard error code on failure. - */ -int of_pci_map_rid(struct device_node *np, u32 rid, - const char *map_name, const char *map_mask_name, - struct device_node **target, u32 *id_out) -{ - u32 map_mask, masked_rid; - int map_len; - const __be32 *map = NULL; - - if (!np || !map_name || (!target && !id_out)) - return -EINVAL; - - map = of_get_property(np, map_name, &map_len); - if (!map) { - if (target) - return -ENODEV; - /* Otherwise, no map implies no translation */ - *id_out = rid; - return 0; - } - - if (!map_len || map_len % (4 * sizeof(*map))) { - pr_err("%pOF: Error: Bad %s length: %d\n", np, - map_name, map_len); - return -EINVAL; - } - - /* The default is to select all bits. */ - map_mask = 0xffffffff; - - /* - * Can be overridden by "{iommu,msi}-map-mask" property. - * If of_property_read_u32() fails, the default is used. - */ - if (map_mask_name) - of_property_read_u32(np, map_mask_name, &map_mask); - - masked_rid = map_mask & rid; - for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) { - struct device_node *phandle_node; - u32 rid_base = be32_to_cpup(map + 0); - u32 phandle = be32_to_cpup(map + 1); - u32 out_base = be32_to_cpup(map + 2); - u32 rid_len = be32_to_cpup(map + 3); - - if (rid_base & ~map_mask) { - pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n", - np, map_name, map_name, - map_mask, rid_base); - return -EFAULT; - } - - if (masked_rid < rid_base || masked_rid >= rid_base + rid_len) - continue; - - phandle_node = of_find_node_by_phandle(phandle); - if (!phandle_node) - return -ENODEV; - - if (target) { - if (*target) - of_node_put(phandle_node); - else - *target = phandle_node; - - if (*target != phandle_node) - continue; - } - - if (id_out) - *id_out = masked_rid - rid_base + out_base; - - pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n", - np, map_name, map_mask, rid_base, out_base, - rid_len, rid, masked_rid - rid_base + out_base); - return 0; - } - - pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n", - np, map_name, rid, target && *target ? *target : NULL); - return -EFAULT; -} - #if IS_ENABLED(CONFIG_OF_IRQ) /** * of_irq_parse_pci - Resolve the interrupt for a PCI device diff --git a/include/linux/of.h b/include/linux/of.h index 99b0ebf49632..bf577ca3c8e7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -550,6 +550,10 @@ bool of_console_check(struct device_node *dn, char *name, int index); extern int of_cpu_node_to_id(struct device_node *np); +int of_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out); + #else /* CONFIG_OF */ static inline void of_core_init(void) @@ -952,6 +956,13 @@ static inline int of_cpu_node_to_id(struct device_node *np) return -ENODEV; } +static inline int of_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out) +{ + return -EINVAL; +} + #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #endif /* CONFIG_OF */ diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index e83d87fc5673..21a89c4880fa 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -14,9 +14,6 @@ struct device_node *of_pci_find_child_device(struct device_node *parent, unsigned int devfn); int of_pci_get_devfn(struct device_node *np); void of_pci_check_probe_only(void); -int of_pci_map_rid(struct device_node *np, u32 rid, - const char *map_name, const char *map_mask_name, - struct device_node **target, u32 *id_out); #else static inline struct device_node *of_pci_find_child_device(struct device_node *parent, unsigned int devfn) @@ -29,13 +26,6 @@ static inline int of_pci_get_devfn(struct device_node *np) return -EINVAL; } -static inline int of_pci_map_rid(struct device_node *np, u32 rid, - const char *map_name, const char *map_mask_name, - struct device_node **target, u32 *id_out) -{ - return -EINVAL; -} - static inline void of_pci_check_probe_only(void) { } #endif From fa0656b40e0fb0135d7d229c8ab0261fe741af7b Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Mon, 10 Sep 2018 19:19:17 +0530 Subject: [PATCH 03/39] iommu/of: support iommu configuration for fsl-mc devices With of_pci_map_rid available for all the busses, use the function for configuration of devices on fsl-mc bus Signed-off-by: Nipun Gupta Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index c13eecac0e8a..c5dd63072529 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -24,6 +24,7 @@ #include #include #include +#include #define NO_IOMMU 1 @@ -142,6 +143,23 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) return err; } +static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev, + struct device_node *master_np) +{ + struct of_phandle_args iommu_spec = { .args_count = 1 }; + int err; + + err = of_map_rid(master_np, mc_dev->icid, "iommu-map", + "iommu-map-mask", &iommu_spec.np, + iommu_spec.args); + if (err) + return err == -ENODEV ? NO_IOMMU : err; + + err = of_iommu_xlate(&mc_dev->dev, &iommu_spec); + of_node_put(iommu_spec.np); + return err; +} + const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { @@ -173,6 +191,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); + } else if (dev_is_fsl_mc(dev)) { + err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np); } else { struct of_phandle_args iommu_spec; int idx = 0; From eab03e2a1a3d9d354943aff5ae5e4254ee1ec967 Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Mon, 10 Sep 2018 19:19:18 +0530 Subject: [PATCH 04/39] iommu/arm-smmu: Add support for the fsl-mc bus Implement bus specific support for the fsl-mc bus including registering arm_smmu_ops and bus specific device add operations. Signed-off-by: Nipun Gupta Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu.c | 7 +++++++ drivers/iommu/iommu.c | 13 +++++++++++++ include/linux/fsl/mc.h | 8 ++++++++ include/linux/iommu.h | 2 ++ 4 files changed, 30 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index fd1b80ef9490..7baf4b03e8ef 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -52,6 +52,7 @@ #include #include +#include #include "io-pgtable.h" #include "arm-smmu-regs.h" @@ -1459,6 +1460,8 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) if (dev_is_pci(dev)) group = pci_device_group(dev); + else if (dev_is_fsl_mc(dev)) + group = fsl_mc_device_group(dev); else group = generic_device_group(dev); @@ -2036,6 +2039,10 @@ static void arm_smmu_bus_init(void) bus_set_iommu(&pci_bus_type, &arm_smmu_ops); } #endif +#ifdef CONFIG_FSL_MC_BUS + if (!iommu_present(&fsl_mc_bus_type)) + bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops); +#endif } static int arm_smmu_device_probe(struct platform_device *pdev) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8c15c5980299..7e5cb7cf2bbe 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -32,6 +32,7 @@ #include #include #include +#include #include static struct kset *iommu_group_kset; @@ -1024,6 +1025,18 @@ struct iommu_group *pci_device_group(struct device *dev) return iommu_group_alloc(); } +/* Get the IOMMU group for device on fsl-mc bus */ +struct iommu_group *fsl_mc_device_group(struct device *dev) +{ + struct device *cont_dev = fsl_mc_cont_dev(dev); + struct iommu_group *group; + + group = iommu_group_get(cont_dev); + if (!group) + group = iommu_group_alloc(); + return group; +} + /** * iommu_group_get_for_dev - Find or create the IOMMU group for a device * @dev: target device diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index f27cb14088a4..dddaca17d684 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -351,6 +351,14 @@ int mc_send_command(struct fsl_mc_io *mc_io, struct fsl_mc_command *cmd); #define dev_is_fsl_mc(_dev) (0) #endif +/* Macro to check if a device is a container device */ +#define fsl_mc_is_cont_dev(_dev) (to_fsl_mc_device(_dev)->flags & \ + FSL_MC_IS_DPRC) + +/* Macro to get the container device of a MC device */ +#define fsl_mc_cont_dev(_dev) (fsl_mc_is_cont_dev(_dev) ? \ + (_dev) : (_dev)->parent) + /* * module_fsl_mc_driver() - Helper macro for drivers that don't do * anything special in module init/exit. This eliminates a lot of diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 87994c265bf5..70102df4b994 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -377,6 +377,8 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain) extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ extern struct iommu_group *generic_device_group(struct device *dev); +/* FSL-MC device grouping function */ +struct iommu_group *fsl_mc_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data From a259ed1618d2e7c212d9fe33f9eebcdc1f231338 Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Mon, 10 Sep 2018 19:19:19 +0530 Subject: [PATCH 05/39] bus/fsl-mc: support dma configure for devices on fsl-mc bus This patch adds support of dma configuration for devices on fsl-mc bus using 'dma_configure' callback for busses. Also, directly calling arch_setup_dma_ops is removed from the fsl-mc bus. Signed-off-by: Nipun Gupta Reviewed-by: Laurentiu Tudor Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/bus/fsl-mc/fsl-mc-bus.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 5d8266c6571f..fa43c7dc6b9e 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -127,6 +127,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int fsl_mc_dma_configure(struct device *dev) +{ + struct device *dma_dev = dev; + + while (dev_is_fsl_mc(dma_dev)) + dma_dev = dma_dev->parent; + + return of_dma_configure(dev, dma_dev->of_node, 0); +} + static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -148,6 +158,7 @@ struct bus_type fsl_mc_bus_type = { .name = "fsl-mc", .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, + .dma_configure = fsl_mc_dma_configure, .dev_groups = fsl_mc_dev_groups, }; EXPORT_SYMBOL_GPL(fsl_mc_bus_type); @@ -633,10 +644,6 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, goto error_cleanup_dev; } - /* Objects are coherent, unless 'no shareability' flag set. */ - if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY)) - arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true); - /* * The device-specific probe callback will get invoked by device_add() */ From 761ba8ed114b23238238d062a119d3920aaa59df Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Mon, 10 Sep 2018 19:19:20 +0530 Subject: [PATCH 06/39] bus/fsl-mc: set coherent dma mask for devices on fsl-mc bus of_dma_configure() API expects coherent_dma_mask to be correctly set in the devices. This patch does the needful. Signed-off-by: Nipun Gupta Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/bus/fsl-mc/fsl-mc-bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index fa43c7dc6b9e..624828b1d21f 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -627,6 +627,7 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, mc_dev->icid = parent_mc_dev->icid; mc_dev->dma_mask = FSL_MC_DEFAULT_DMA_MASK; mc_dev->dev.dma_mask = &mc_dev->dma_mask; + mc_dev->dev.coherent_dma_mask = mc_dev->dma_mask; dev_set_msi_domain(&mc_dev->dev, dev_get_msi_domain(&parent_mc_dev->dev)); } From 4f973ed32164a0df2026a7051980328b1a771f7c Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Mon, 10 Sep 2018 19:19:21 +0530 Subject: [PATCH 07/39] arm64: dts: ls208xa: comply with the iommu map binding for fsl_mc fsl-mc bus support the new iommu-map property. Comply to this binding for fsl_mc bus. Signed-off-by: Nipun Gupta Reviewed-by: Laurentiu Tudor Signed-off-by: Joerg Roedel --- arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index 8cb78dd99672..90c0faf8579f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -148,6 +148,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; + dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>; clockgen: clocking@1300000 { compatible = "fsl,ls2080a-clockgen"; @@ -321,6 +322,8 @@ reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */ msi-parent = <&its>; + iommu-map = <0 &smmu 0 0>; /* This is fixed-up by u-boot */ + dma-coherent; #address-cells = <3>; #size-cells = <1>; @@ -424,6 +427,9 @@ compatible = "arm,mmu-500"; reg = <0 0x5000000 0 0x800000>; #global-interrupts = <12>; + #iommu-cells = <1>; + stream-match-mask = <0x7C00>; + dma-coherent; interrupts = <0 13 4>, /* global secure fault */ <0 14 4>, /* combined secure interrupt */ <0 15 4>, /* global non-secure fault */ @@ -466,7 +472,6 @@ <0 204 4>, <0 205 4>, <0 206 4>, <0 207 4>, <0 208 4>, <0 209 4>; - mmu-masters = <&fsl_mc 0x300 0>; }; dspi: dspi@2100000 { From bee60e94a1e20ec0b8ffdafae270731d8fda4551 Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Wed, 5 Sep 2018 09:57:36 +0530 Subject: [PATCH 08/39] iommu/iova: Optimise attempts to allocate iova from 32bit address range As an optimisation for PCI devices, there is always first attempt been made to allocate iova from SAC address range. This will lead to unnecessary attempts, when there are no free ranges available. Adding fix to track recently failed iova address size and allow further attempts, only if requested size is lesser than a failed size. The size is updated when any replenish happens. Reviewed-by: Robin Murphy Signed-off-by: Ganapatrao Kulkarni Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 22 +++++++++++++++------- include/linux/iova.h | 1 + 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 83fe2621effe..f8d3ba247523 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -56,6 +56,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); + iovad->max32_alloc_size = iovad->dma_32bit_pfn; iovad->flush_cb = NULL; iovad->fq = NULL; iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; @@ -139,8 +140,10 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = rb_entry(iovad->cached32_node, struct iova, node); if (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo) + free->pfn_lo >= cached_iova->pfn_lo) { iovad->cached32_node = rb_next(&free->node); + iovad->max32_alloc_size = iovad->dma_32bit_pfn; + } cached_iova = rb_entry(iovad->cached_node, struct iova, node); if (free->pfn_lo >= cached_iova->pfn_lo) @@ -190,6 +193,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + if (limit_pfn <= iovad->dma_32bit_pfn && + size >= iovad->max32_alloc_size) + goto iova32_full; + curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = rb_entry(curr, struct iova, node); do { @@ -200,10 +207,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr_iova = rb_entry(curr, struct iova, node); } while (curr && new_pfn <= curr_iova->pfn_hi); - if (limit_pfn < size || new_pfn < iovad->start_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; - } + if (limit_pfn < size || new_pfn < iovad->start_pfn) + goto iova32_full; /* pfn_lo will point to size aligned address if size_aligned is set */ new->pfn_lo = new_pfn; @@ -214,9 +219,12 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - - return 0; + +iova32_full: + iovad->max32_alloc_size = size; + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; } static struct kmem_cache *iova_cache; diff --git a/include/linux/iova.h b/include/linux/iova.h index 928442dda565..0b93bf96693e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,6 +75,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + unsigned long max32_alloc_size; /* Size of last failed allocation */ struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ From 6af588fed39178c8e118fcf9cb6664e58a1fbe88 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 12 Sep 2018 16:24:12 +0100 Subject: [PATCH 09/39] iommu: Add fast hook for getting DMA domains While iommu_get_domain_for_dev() is the robust way for arbitrary IOMMU API callers to retrieve the domain pointer, for DMA ops domains it doesn't scale well for large systems and multi-queue devices, since the momentary refcount adjustment will lead to exclusive cacheline contention when multiple CPUs are operating in parallel on different mappings for the same device. In the case of DMA ops domains, however, this refcounting is actually unnecessary, since they already imply that the group exists and is managed by platform code and IOMMU internals (by virtue of iommu_group_get_for_dev()) such that a reference will already be held for the lifetime of the device. Thus we can avoid the bottleneck by providing a fast lookup specifically for the DMA code to retrieve the default domain it already knows it has set up - a simple read-only dereference plays much nicer with cache-coherency protocols. Signed-off-by: Robin Murphy Tested-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 9 +++++++++ include/linux/iommu.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8c15c5980299..9d70344204fe 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1415,6 +1415,15 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); +/* + * For IOMMU_DOMAIN_DMA implementations which already provide their own + * guarantees that the group and its default domain are valid and correct. + */ +struct iommu_domain *iommu_get_dma_domain(struct device *dev) +{ + return dev->iommu_group->default_domain; +} + /* * IOMMU groups are really the natrual working unit of the IOMMU, but * the IOMMU API works on domains and devices. Bridge that gap by diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 87994c265bf5..c783648d4060 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -293,6 +293,7 @@ extern int iommu_attach_device(struct iommu_domain *domain, extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); +extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, From 43c5bf11a610ceeae68b26c24e0c76852d0d5cfc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 12 Sep 2018 16:24:13 +0100 Subject: [PATCH 10/39] iommu/dma: Use fast DMA domain lookup Most parts of iommu-dma already assume they are operating on a default domain set up by iommu_dma_init_domain(), and can be converted straight over to avoid the refcounting bottleneck. MSI page mappings may be in an unmanaged domain with an explicit MSI-only cookie, so retain the non-specific lookup, but that's OK since they're far from a contended fast path either way. Signed-off-by: Robin Murphy Tested-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 511ff9a1d6d9..320f9ea82f3f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -491,7 +491,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, void iommu_dma_free(struct device *dev, struct page **pages, size_t size, dma_addr_t *handle) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); *handle = IOMMU_MAPPING_ERROR; } @@ -518,7 +518,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, unsigned long attrs, int prot, dma_addr_t *handle, void (*flush_page)(struct device *, const void *, phys_addr_t)) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; struct page **pages; @@ -606,9 +606,8 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, - size_t size, int prot) + size_t size, int prot, struct iommu_domain *domain) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; size_t iova_off = 0; dma_addr_t iova; @@ -632,13 +631,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, int prot) { - return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot); + return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot, + iommu_get_dma_domain(dev)); } void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } /* @@ -726,7 +726,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int prot) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; struct scatterlist *s, *prev = NULL; @@ -811,20 +811,21 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, sg = tmp; } end = sg_dma_address(sg) + sg_dma_len(sg); - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start); + __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start); } dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { return __iommu_dma_map(dev, phys, size, - dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO); + dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, + iommu_get_dma_domain(dev)); } void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); + __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -850,7 +851,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!msi_page) return NULL; - iova = __iommu_dma_map(dev, msi_addr, size, prot); + iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); if (iommu_dma_mapping_error(dev, iova)) goto out_free_page; From 7adb562c3e90f87b0da196c372afe251ad4ec62e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 12 Sep 2018 16:24:14 +0100 Subject: [PATCH 11/39] arm64/dma-mapping: Mildly optimise non-coherent IOMMU ops Whilst the symmetry of deferring to the existing sync callback in __iommu_map_page() is nice, taking a round-trip through iommu_iova_to_phys() is a pretty heavyweight way to get an address we can trivially compute from the page we already have. Tweaking it to just perform the cache maintenance directly when appropriate doesn't really make the code any more complicated, and the runtime efficiency gain can only be a benefit. Furthermore, the sync operations themselves know they can only be invoked on a managed DMA ops domain, so can use the fast specific domain lookup to avoid excessive manipulation of the group refcount (particularly in the scatterlist cases). Acked-by: Will Deacon Signed-off-by: Robin Murphy Tested-by: Will Deacon Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 072c51fb07d7..cf017c5bb5e7 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -712,7 +712,7 @@ static void __iommu_sync_single_for_cpu(struct device *dev, if (is_device_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); __dma_unmap_area(phys_to_virt(phys), size, dir); } @@ -725,7 +725,7 @@ static void __iommu_sync_single_for_device(struct device *dev, if (is_device_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); __dma_map_area(phys_to_virt(phys), size, dir); } @@ -738,9 +738,9 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, int prot = dma_info_to_prot(dir, coherent, attrs); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); - if (!iommu_dma_mapping_error(dev, dev_addr) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_single_for_device(dev, dev_addr, size, dir); + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !iommu_dma_mapping_error(dev, dev_addr)) + __dma_map_area(page_address(page) + offset, size, dir); return dev_addr; } From bc46c229b6afc4188c13a034c868527992dc63c9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 11 Sep 2018 13:28:32 +0100 Subject: [PATCH 12/39] iommu/fsl: Fix spelling mistake pci_endpt_partioning -> pci_endpt_partitioning Trivial fix to spelling mistake in variable name Signed-off-by: Colin Ian King Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index f089136e9c3f..18970d38f698 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -916,13 +916,13 @@ static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev) static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) { struct pci_controller *pci_ctl; - bool pci_endpt_partioning; + bool pci_endpt_partitioning; struct iommu_group *group = NULL; pci_ctl = pci_bus_to_host(pdev->bus); - pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); + pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl); /* We can partition PCIe devices so assign device group to the device */ - if (pci_endpt_partioning) { + if (pci_endpt_partitioning) { group = pci_device_group(&pdev->dev); /* From 57d3f11c83b806e30ab267fd978c1829483654a6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 7 Sep 2018 01:42:15 +0000 Subject: [PATCH 13/39] iommu/ipmmu-vmsa: Convert to SPDX identifiers This patch updates license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 22b94f8a9a04..b98a03189580 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IPMMU VMSA * * Copyright (C) 2014 Renesas Electronics Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. */ #include From 26b86092c4650311256fa2372ced7e1e17d97d7b Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Tue, 11 Sep 2018 17:11:36 -0700 Subject: [PATCH 14/39] iommu/vt-d: Relocate struct/function declarations to its header files To reuse the static functions and the struct declarations, move them to corresponding header files and export the needed functions. Cc: Lu Baolu Cc: Fenghua Yu Cc: Jacob Pan Cc: Ashok Raj Reviewed-by: Andy Shevchenko Signed-off-by: Gayatri Kammela Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- arch/x86/include/asm/irq_remapping.h | 2 ++ drivers/iommu/intel-iommu.c | 31 +++------------------------- drivers/iommu/intel_irq_remapping.c | 2 +- include/linux/intel-iommu.h | 30 +++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5f26962eff42..67ed72f31cc2 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,8 @@ struct vcpu_data { #ifdef CONFIG_IRQ_REMAP +extern raw_spinlock_t irq_2_ir_lock; + extern bool irq_remapping_cap(enum irq_remap_cap cap); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5f3f10cf9d9d..b5868c757995 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -185,16 +185,6 @@ static int rwbf_quirk; static int force_on = 0; int intel_iommu_tboot_noforce; -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 lo; - u64 hi; -}; #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) /* @@ -220,21 +210,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) return re->hi & VTD_PAGE_MASK; } -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; static inline void context_clear_pasid_enable(struct context_entry *context) { @@ -261,7 +236,7 @@ static inline bool __context_present(struct context_entry *context) return (context->lo & 1); } -static inline bool context_present(struct context_entry *context) +bool context_present(struct context_entry *context) { return context_pasid_enabled(context) ? __context_present(context) : @@ -788,8 +763,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain->iommu_superpage = domain_update_iommu_superpage(NULL); } -static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu, - u8 bus, u8 devfn, int alloc) +struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + u8 devfn, int alloc) { struct root_entry *root = &iommu->root_entry[bus]; struct context_entry *context; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 967450bd421a..c2d6c11431de 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS]; * in single-threaded environment with interrupt disabled, so no need to tabke * the dmar_global_lock. */ -static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); +DEFINE_RAW_SPINLOCK(irq_2_ir_lock); static const struct irq_domain_ops intel_ir_domain_ops; static void iommu_disable_irq_remapping(struct intel_iommu *iommu); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 28004d74ae04..b7cf32e8ae1f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -389,6 +389,33 @@ struct pasid_entry; struct pasid_state_entry; struct page_req_dsc; +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 lo; + u64 hi; +}; + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; + struct dmar_domain { int nid; /* node id */ @@ -559,5 +586,8 @@ extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); #endif extern const struct attribute_group *intel_iommu_groups[]; +bool context_present(struct context_entry *context); +struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + u8 devfn, int alloc); #endif From 4a2d80dbadb72b998641af32d8dd4b7b39e72aa0 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Tue, 11 Sep 2018 17:11:37 -0700 Subject: [PATCH 15/39] iommu/vt-d: Update register definitions to VT-d 3.0 specification Add new register definitions added in the VT-d 3.0 specification. Also include registers that were missing previously. Cc: Lu Baolu Cc: Fenghua Yu Cc: Jacob Pan Cc: Ashok Raj Cc: Gayatri Kammela Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b7cf32e8ae1f..3bdb9aa198af 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -72,6 +72,42 @@ #define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */ #define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */ #define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */ +#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */ +#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */ +#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */ +#define DMAR_MTRR_FIX16K_80000_REG 0x128 +#define DMAR_MTRR_FIX16K_A0000_REG 0x130 +#define DMAR_MTRR_FIX4K_C0000_REG 0x138 +#define DMAR_MTRR_FIX4K_C8000_REG 0x140 +#define DMAR_MTRR_FIX4K_D0000_REG 0x148 +#define DMAR_MTRR_FIX4K_D8000_REG 0x150 +#define DMAR_MTRR_FIX4K_E0000_REG 0x158 +#define DMAR_MTRR_FIX4K_E8000_REG 0x160 +#define DMAR_MTRR_FIX4K_F0000_REG 0x168 +#define DMAR_MTRR_FIX4K_F8000_REG 0x170 +#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */ +#define DMAR_MTRR_PHYSMASK0_REG 0x188 +#define DMAR_MTRR_PHYSBASE1_REG 0x190 +#define DMAR_MTRR_PHYSMASK1_REG 0x198 +#define DMAR_MTRR_PHYSBASE2_REG 0x1a0 +#define DMAR_MTRR_PHYSMASK2_REG 0x1a8 +#define DMAR_MTRR_PHYSBASE3_REG 0x1b0 +#define DMAR_MTRR_PHYSMASK3_REG 0x1b8 +#define DMAR_MTRR_PHYSBASE4_REG 0x1c0 +#define DMAR_MTRR_PHYSMASK4_REG 0x1c8 +#define DMAR_MTRR_PHYSBASE5_REG 0x1d0 +#define DMAR_MTRR_PHYSMASK5_REG 0x1d8 +#define DMAR_MTRR_PHYSBASE6_REG 0x1e0 +#define DMAR_MTRR_PHYSMASK6_REG 0x1e8 +#define DMAR_MTRR_PHYSBASE7_REG 0x1f0 +#define DMAR_MTRR_PHYSMASK7_REG 0x1f8 +#define DMAR_MTRR_PHYSBASE8_REG 0x200 +#define DMAR_MTRR_PHYSMASK8_REG 0x208 +#define DMAR_MTRR_PHYSBASE9_REG 0x210 +#define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_VCCAP_REG 0xe00 /* Virtual command capability register */ +#define DMAR_VCMD_REG 0xe10 /* Virtual command register */ +#define DMAR_VCRSP_REG 0xe20 /* Virtual command response register */ #define OFFSET_STRIDE (9) From ee2636b8670b1ab2a02a65923a9bef59e9199c37 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Tue, 11 Sep 2018 17:11:38 -0700 Subject: [PATCH 16/39] iommu/vt-d: Enable base Intel IOMMU debugfs support Add a new config option CONFIG_INTEL_IOMMU_DEBUGFS and do the base enabling for Intel IOMMU debugfs. Cc: Lu Baolu Cc: Fenghua Yu Cc: Ashok Raj Cc: Jacob Pan Co-Developed-by: Gayatri Kammela Signed-off-by: Gayatri Kammela Reviewed-by: Andy Shevchenko Reviewed-by: Lu Baolu Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 13 +++++++++++++ drivers/iommu/Makefile | 1 + drivers/iommu/intel-iommu-debugfs.c | 20 ++++++++++++++++++++ drivers/iommu/intel-iommu.c | 1 + include/linux/intel-iommu.h | 6 ++++++ 5 files changed, 41 insertions(+) create mode 100644 drivers/iommu/intel-iommu-debugfs.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c60395b7470f..51ba19c8847b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -186,6 +186,19 @@ config INTEL_IOMMU and include PCI device scope covered by these DMA remapping devices. +config INTEL_IOMMU_DEBUGFS + bool "Export Intel IOMMU internals in Debugfs" + depends on INTEL_IOMMU && IOMMU_DEBUGFS + help + !!!WARNING!!! + + DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!! + + Expose Intel IOMMU internals in Debugfs. + + This option is -NOT- intended for production environments, and should + only be enabled for debugging Intel IOMMU. + config INTEL_IOMMU_SVM bool "Support for Shared Virtual Memory with Intel IOMMU" depends on INTEL_IOMMU && X86 diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index ab5eba6edf82..a158a68c8ea8 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o +obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c new file mode 100644 index 000000000000..33e0a6c12d92 --- /dev/null +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2018 Intel Corporation. + * + * Authors: Gayatri Kammela + * Sohil Mehta + * Jacob Pan + */ + +#include +#include +#include +#include + +#include + +void __init intel_iommu_debugfs_init(void) +{ + debugfs_create_dir("intel", iommu_debugfs_dir); +} diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b5868c757995..b9cf7c8c1616 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4837,6 +4837,7 @@ int __init intel_iommu_init(void) cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, intel_iommu_cpu_dead); intel_iommu_enabled = 1; + intel_iommu_debugfs_init(); return 0; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 3bdb9aa198af..b0ae25837361 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -621,6 +621,12 @@ extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_ extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); #endif +#ifdef CONFIG_INTEL_IOMMU_DEBUGFS +void intel_iommu_debugfs_init(void); +#else +static inline void intel_iommu_debugfs_init(void) {} +#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ + extern const struct attribute_group *intel_iommu_groups[]; bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, From 6825d3ea6cdea68694a99a7f23c72d937fabc011 Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Tue, 11 Sep 2018 17:11:39 -0700 Subject: [PATCH 17/39] iommu/vt-d: Add debugfs support to show register contents Add Intel IOMMU debugfs extension to dump all the register contents for each IOMMU device. Example: root@OTC-KBLH-01:~# cat /sys/kernel/debug/iommu/intel/iommu_regset IOMMU: dmar0 Register Base Address: fed90000 Name Offset Contents VER 0x00 0x0000000000000010 CAP 0x08 0x01c0000c40660462 ECAP 0x10 0x0000000000f0101a GCMD 0x18 0x0000000000000000 GSTS 0x1c 0x00000000c7000000 RTADDR 0x20 0x00000004071d3800 CCMD 0x28 0x0800000000000000 FSTS 0x34 0x0000000000000000 FECTL 0x38 0x0000000000000000 FEDATA 0x3c 0xfee0100400004021 Cc: Lu Baolu Cc: Fenghua Yu Cc: Jacob Pan Cc: Ashok Raj Signed-off-by: Gayatri Kammela Reviewed-by: Andy Shevchenko Reviewed-by: Lu Baolu Co-Developed-by: Sohil Mehta Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu-debugfs.c | 123 +++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 33e0a6c12d92..57af2f774312 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -14,7 +14,128 @@ #include +struct iommu_regset { + int offset; + const char *regs; +}; + +#define IOMMU_REGSET_ENTRY(_reg_) \ + { DMAR_##_reg_##_REG, __stringify(_reg_) } +static const struct iommu_regset iommu_regs[] = { + IOMMU_REGSET_ENTRY(VER), + IOMMU_REGSET_ENTRY(CAP), + IOMMU_REGSET_ENTRY(ECAP), + IOMMU_REGSET_ENTRY(GCMD), + IOMMU_REGSET_ENTRY(GSTS), + IOMMU_REGSET_ENTRY(RTADDR), + IOMMU_REGSET_ENTRY(CCMD), + IOMMU_REGSET_ENTRY(FSTS), + IOMMU_REGSET_ENTRY(FECTL), + IOMMU_REGSET_ENTRY(FEDATA), + IOMMU_REGSET_ENTRY(FEADDR), + IOMMU_REGSET_ENTRY(FEUADDR), + IOMMU_REGSET_ENTRY(AFLOG), + IOMMU_REGSET_ENTRY(PMEN), + IOMMU_REGSET_ENTRY(PLMBASE), + IOMMU_REGSET_ENTRY(PLMLIMIT), + IOMMU_REGSET_ENTRY(PHMBASE), + IOMMU_REGSET_ENTRY(PHMLIMIT), + IOMMU_REGSET_ENTRY(IQH), + IOMMU_REGSET_ENTRY(IQT), + IOMMU_REGSET_ENTRY(IQA), + IOMMU_REGSET_ENTRY(ICS), + IOMMU_REGSET_ENTRY(IRTA), + IOMMU_REGSET_ENTRY(PQH), + IOMMU_REGSET_ENTRY(PQT), + IOMMU_REGSET_ENTRY(PQA), + IOMMU_REGSET_ENTRY(PRS), + IOMMU_REGSET_ENTRY(PECTL), + IOMMU_REGSET_ENTRY(PEDATA), + IOMMU_REGSET_ENTRY(PEADDR), + IOMMU_REGSET_ENTRY(PEUADDR), + IOMMU_REGSET_ENTRY(MTRRCAP), + IOMMU_REGSET_ENTRY(MTRRDEF), + IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000), + IOMMU_REGSET_ENTRY(MTRR_FIX16K_80000), + IOMMU_REGSET_ENTRY(MTRR_FIX16K_A0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_C0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_C8000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_D0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_D8000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_E0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_E8000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_F0000), + IOMMU_REGSET_ENTRY(MTRR_FIX4K_F8000), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE0), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK0), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE1), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK1), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE2), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK2), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE3), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK3), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE4), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK4), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE5), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK5), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE6), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK6), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE7), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK7), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE8), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8), + IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9), + IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9), + IOMMU_REGSET_ENTRY(VCCAP), + IOMMU_REGSET_ENTRY(VCMD), + IOMMU_REGSET_ENTRY(VCRSP), +}; + +static int iommu_regset_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + unsigned long flag; + int i, ret = 0; + u64 value; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!drhd->reg_base_addr) { + seq_puts(m, "IOMMU: Invalid base address\n"); + ret = -EINVAL; + goto out; + } + + seq_printf(m, "IOMMU: %s Register Base Address: %llx\n", + iommu->name, drhd->reg_base_addr); + seq_puts(m, "Name\t\t\tOffset\t\tContents\n"); + /* + * Publish the contents of the 64-bit hardware registers + * by adding the offset to the pointer (virtual address). + */ + raw_spin_lock_irqsave(&iommu->register_lock, flag); + for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) { + value = dmar_readq(iommu->reg + iommu_regs[i].offset); + seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n", + iommu_regs[i].regs, iommu_regs[i].offset, + value); + } + raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + seq_putc(m, '\n'); + } +out: + rcu_read_unlock(); + + return ret; +} +DEFINE_SHOW_ATTRIBUTE(iommu_regset); + void __init intel_iommu_debugfs_init(void) { - debugfs_create_dir("intel", iommu_debugfs_dir); + struct dentry *intel_iommu_debug = debugfs_create_dir("intel", + iommu_debugfs_dir); + + debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL, + &iommu_regset_fops); } From a6d268c619d6765322f2fbdd1d7d2769ebc04cb2 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Tue, 11 Sep 2018 17:11:40 -0700 Subject: [PATCH 18/39] iommu/vt-d: Add debugfs support for Interrupt remapping Debugfs extension for Intel IOMMU to dump Interrupt remapping table entries for Interrupt remapping and Interrupt posting. The file /sys/kernel/debug/iommu/intel/ir_translation_struct provides detailed information, such as Index, Source Id, Destination Id, Vector and the IRTE values for entries with the present bit set, in the format shown. Remapped Interrupt supported on IOMMU: dmar1 IR table address:85e500000 Entry SrcID DstID Vct IRTE_high IRTE_low 24 01:00.0 00000001 24 0000000000040100 000000010024000d 25 01:00.0 00000004 22 0000000000040100 000000040022000d Posted Interrupt supported on IOMMU: dmar5 IR table address:85ec00000 Entry SrcID PDA_high PDA_low Vct IRTE_high IRTE_low 4 43:00.0 0000000f ff765980 41 0000000f00044300 ff76598000418001 5 43:00.0 0000000f ff765980 51 0000000f00044300 ff76598000518001 Cc: Lu Baolu Cc: Jacob Pan Cc: Fenghua Yu Cc: Ashok Raj Co-Developed-by: Gayatri Kammela Signed-off-by: Gayatri Kammela Reviewed-by: Andy Shevchenko Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu-debugfs.c | 108 ++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 57af2f774312..2becbd78620f 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -131,6 +131,110 @@ out: } DEFINE_SHOW_ATTRIBUTE(iommu_regset); +#ifdef CONFIG_IRQ_REMAP +static void ir_tbl_remap_entry_show(struct seq_file *m, + struct intel_iommu *iommu) +{ + struct irte *ri_entry; + unsigned long flags; + int idx; + + seq_puts(m, " Entry SrcID DstID Vct IRTE_high\t\tIRTE_low\n"); + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) { + ri_entry = &iommu->ir_table->base[idx]; + if (!ri_entry->present || ri_entry->p_pst) + continue; + + seq_printf(m, " %-5d %02x:%02x.%01x %08x %02x %016llx\t%016llx\n", + idx, PCI_BUS_NUM(ri_entry->sid), + PCI_SLOT(ri_entry->sid), PCI_FUNC(ri_entry->sid), + ri_entry->dest_id, ri_entry->vector, + ri_entry->high, ri_entry->low); + } + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); +} + +static void ir_tbl_posted_entry_show(struct seq_file *m, + struct intel_iommu *iommu) +{ + struct irte *pi_entry; + unsigned long flags; + int idx; + + seq_puts(m, " Entry SrcID PDA_high PDA_low Vct IRTE_high\t\tIRTE_low\n"); + + raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) { + pi_entry = &iommu->ir_table->base[idx]; + if (!pi_entry->present || !pi_entry->p_pst) + continue; + + seq_printf(m, " %-5d %02x:%02x.%01x %08x %08x %02x %016llx\t%016llx\n", + idx, PCI_BUS_NUM(pi_entry->sid), + PCI_SLOT(pi_entry->sid), PCI_FUNC(pi_entry->sid), + pi_entry->pda_h, pi_entry->pda_l << 6, + pi_entry->vector, pi_entry->high, + pi_entry->low); + } + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); +} + +/* + * For active IOMMUs go through the Interrupt remapping + * table and print valid entries in a table format for + * Remapped and Posted Interrupts. + */ +static int ir_translation_struct_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + u64 irta; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!ecap_ir_support(iommu->ecap)) + continue; + + seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n", + iommu->name); + + if (iommu->ir_table) { + irta = virt_to_phys(iommu->ir_table->base); + seq_printf(m, " IR table address:%llx\n", irta); + ir_tbl_remap_entry_show(m, iommu); + } else { + seq_puts(m, "Interrupt Remapping is not enabled\n"); + } + seq_putc(m, '\n'); + } + + seq_puts(m, "****\n\n"); + + for_each_active_iommu(iommu, drhd) { + if (!cap_pi_support(iommu->cap)) + continue; + + seq_printf(m, "Posted Interrupt supported on IOMMU: %s\n", + iommu->name); + + if (iommu->ir_table) { + irta = virt_to_phys(iommu->ir_table->base); + seq_printf(m, " IR table address:%llx\n", irta); + ir_tbl_posted_entry_show(m, iommu); + } else { + seq_puts(m, "Interrupt Remapping is not enabled\n"); + } + seq_putc(m, '\n'); + } + rcu_read_unlock(); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ir_translation_struct); +#endif + void __init intel_iommu_debugfs_init(void) { struct dentry *intel_iommu_debug = debugfs_create_dir("intel", @@ -138,4 +242,8 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL, &iommu_regset_fops); +#ifdef CONFIG_IRQ_REMAP + debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, + NULL, &ir_translation_struct_fops); +#endif } From 18f99c9b9dec15bc088d9db239f68e5c80415304 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Tue, 11 Sep 2018 17:11:41 -0700 Subject: [PATCH 19/39] iommu/vt-d: Add debugfs support to show context internals Export Intel IOMMU DMA remapping internal states, such as root and context table entries in debugfs. Example of such dump in Kabylake: /sys/kernel/debug/iommu/intel/dmar_translation_struct IOMMU dmar0: Root Table Address:45699c000 Root Table Entries: Bus: 0 H: 0 L: 45699f001 Context Table Entries for Bus: 0 Entry B:D.F High Low 16 00:02.0 102 45699e005 IOMMU dmar1: Root Table Address:45699d000 Root Table Entries: Bus: 0 H: 0 L: 45644f001 Context Table Entries for Bus: 0 Entry B:D.F High Low 160 00:14.0 102 45644e001 184 00:17.0 302 456310001 248 00:1f.0 202 4564bf001 Cc: Lu Baolu Cc: Fenghua Yu Cc: Ashok Raj Cc: Jacob Pan Co-Developed-by: Gayatri Kammela Signed-off-by: Gayatri Kammela Reviewed-by: Andy Shevchenko Reviewed-by: Lu Baolu Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu-debugfs.c | 65 +++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 2becbd78620f..7fabf9b1c2dc 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -131,6 +131,69 @@ out: } DEFINE_SHOW_ATTRIBUTE(iommu_regset); +static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu, + int bus) +{ + struct context_entry *context; + int devfn; + + seq_printf(m, " Context Table Entries for Bus: %d\n", bus); + seq_puts(m, " Entry\tB:D.F\tHigh\tLow\n"); + + for (devfn = 0; devfn < 256; devfn++) { + context = iommu_context_addr(iommu, bus, devfn, 0); + if (!context) + return; + + if (!context_present(context)) + continue; + + seq_printf(m, " %-5d\t%02x:%02x.%x\t%-6llx\t%llx\n", devfn, + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + context[0].hi, context[0].lo); + } +} + +static void root_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu) +{ + unsigned long flags; + int bus; + + spin_lock_irqsave(&iommu->lock, flags); + seq_printf(m, "IOMMU %s: Root Table Address:%llx\n", iommu->name, + (u64)virt_to_phys(iommu->root_entry)); + seq_puts(m, "Root Table Entries:\n"); + + for (bus = 0; bus < 256; bus++) { + if (!(iommu->root_entry[bus].lo & 1)) + continue; + + seq_printf(m, " Bus: %d H: %llx L: %llx\n", bus, + iommu->root_entry[bus].hi, + iommu->root_entry[bus].lo); + + ctx_tbl_entry_show(m, iommu, bus); + seq_putc(m, '\n'); + } + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static int dmar_translation_struct_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + root_tbl_entry_show(m, iommu); + seq_putc(m, '\n'); + } + rcu_read_unlock(); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct); + #ifdef CONFIG_IRQ_REMAP static void ir_tbl_remap_entry_show(struct seq_file *m, struct intel_iommu *iommu) @@ -242,6 +305,8 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL, &iommu_regset_fops); + debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug, + NULL, &dmar_translation_struct_fops); #ifdef CONFIG_IRQ_REMAP debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, NULL, &ir_translation_struct_fops); From 701d8a624a2d5aa7d7efd38800c7c6ab4a4c453c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 19 Sep 2018 11:12:57 +0100 Subject: [PATCH 20/39] iommu: Tidy up window attributes The external interface to get/set window attributes is already abstracted behind iommu_domain_{get,set}_attr(), so there's no real reason for the internal interface to be different. Since we only have one window-based driver anyway, clean up the core code by just moving the DOMAIN_ATTR_WINDOWS handling directly into the PAMU driver. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 20 ++++++++++++++++++++ drivers/iommu/iommu.c | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index f089136e9c3f..f83965ee3095 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -818,6 +818,7 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, enum iommu_attr attr_type, void *data) { struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + u32 *count; int ret = 0; switch (attr_type) { @@ -829,6 +830,15 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, break; case DOMAIN_ATTR_FSL_PAMU_ENABLE: ret = configure_domain_dma_state(dma_domain, *(int *)data); + break; + case DOMAIN_ATTR_WINDOWS: + count = data; + + if (domain->ops->domain_set_windows != NULL) + ret = domain->ops->domain_set_windows(domain, *count); + else + ret = -ENODEV; + break; default: pr_debug("Unsupported attribute type\n"); @@ -843,6 +853,7 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, enum iommu_attr attr_type, void *data) { struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + u32 *count; int ret = 0; switch (attr_type) { @@ -855,6 +866,15 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, break; case DOMAIN_ATTR_FSL_PAMUV1: *(int *)data = DOMAIN_ATTR_FSL_PAMUV1; + break; + case DOMAIN_ATTR_WINDOWS: + count = data; + + if (domain->ops->domain_get_windows != NULL) + *count = domain->ops->domain_get_windows(domain); + else + ret = -ENODEV; + break; default: pr_debug("Unsupported attribute type\n"); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9d70344204fe..e9b50abc02a4 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1805,7 +1805,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain, struct iommu_domain_geometry *geometry; bool *paging; int ret = 0; - u32 *count; switch (attr) { case DOMAIN_ATTR_GEOMETRY: @@ -1816,15 +1815,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain, case DOMAIN_ATTR_PAGING: paging = data; *paging = (domain->pgsize_bitmap != 0UL); - break; - case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_get_windows != NULL) - *count = domain->ops->domain_get_windows(domain); - else - ret = -ENODEV; - break; default: if (!domain->ops->domain_get_attr) @@ -1841,18 +1831,8 @@ int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { int ret = 0; - u32 *count; switch (attr) { - case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_set_windows != NULL) - ret = domain->ops->domain_set_windows(domain, *count); - else - ret = -ENODEV; - - break; default: if (domain->ops->domain_set_attr == NULL) return -EINVAL; From 5131e08cd0f8a3faa30c1cdaf53940d6bce1715a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 19 Sep 2018 11:12:58 +0100 Subject: [PATCH 21/39] iommu: Remove .domain_{get,set}_windows Since these are trivially handled by the .domain_{get,set}_attr callbacks when relevant, we can streamline struct iommu_ops for everyone. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 125 +++++++++++++------------------- include/linux/iommu.h | 6 -- 2 files changed, 51 insertions(+), 80 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index f83965ee3095..a906ce8cf83b 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -814,11 +814,59 @@ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool en return 0; } +static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) +{ + struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Ensure that the geometry has been set for the domain */ + if (!dma_domain->geom_size) { + pr_debug("Please configure geometry before setting the number of windows\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + /* + * Ensure we have valid window count i.e. it should be less than + * maximum permissible limit and should be a power of two. + */ + if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { + pr_debug("Invalid window count\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, + w_count > 1 ? w_count : 0); + if (!ret) { + kfree(dma_domain->win_arr); + dma_domain->win_arr = kcalloc(w_count, + sizeof(*dma_domain->win_arr), + GFP_ATOMIC); + if (!dma_domain->win_arr) { + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENOMEM; + } + dma_domain->win_cnt = w_count; + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, enum iommu_attr attr_type, void *data) { struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - u32 *count; int ret = 0; switch (attr_type) { @@ -832,13 +880,7 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, ret = configure_domain_dma_state(dma_domain, *(int *)data); break; case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_set_windows != NULL) - ret = domain->ops->domain_set_windows(domain, *count); - else - ret = -ENODEV; - + ret = fsl_pamu_set_windows(domain, *(u32 *)data); break; default: pr_debug("Unsupported attribute type\n"); @@ -853,7 +895,6 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, enum iommu_attr attr_type, void *data) { struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - u32 *count; int ret = 0; switch (attr_type) { @@ -868,13 +909,7 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, *(int *)data = DOMAIN_ATTR_FSL_PAMUV1; break; case DOMAIN_ATTR_WINDOWS: - count = data; - - if (domain->ops->domain_get_windows != NULL) - *count = domain->ops->domain_get_windows(domain); - else - ret = -ENODEV; - + *(u32 *)data = dma_domain->win_cnt; break; default: pr_debug("Unsupported attribute type\n"); @@ -1014,62 +1049,6 @@ static void fsl_pamu_remove_device(struct device *dev) iommu_group_remove_device(dev); } -static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - unsigned long flags; - int ret; - - spin_lock_irqsave(&dma_domain->domain_lock, flags); - /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ - if (dma_domain->enabled) { - pr_debug("Can't set geometry attributes as domain is active\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EBUSY; - } - - /* Ensure that the geometry has been set for the domain */ - if (!dma_domain->geom_size) { - pr_debug("Please configure geometry before setting the number of windows\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - /* - * Ensure we have valid window count i.e. it should be less than - * maximum permissible limit and should be a power of two. - */ - if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { - pr_debug("Invalid window count\n"); - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -EINVAL; - } - - ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, - w_count > 1 ? w_count : 0); - if (!ret) { - kfree(dma_domain->win_arr); - dma_domain->win_arr = kcalloc(w_count, - sizeof(*dma_domain->win_arr), - GFP_ATOMIC); - if (!dma_domain->win_arr) { - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - return -ENOMEM; - } - dma_domain->win_cnt = w_count; - } - spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - - return ret; -} - -static u32 fsl_pamu_get_windows(struct iommu_domain *domain) -{ - struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); - - return dma_domain->win_cnt; -} - static const struct iommu_ops fsl_pamu_ops = { .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, @@ -1078,8 +1057,6 @@ static const struct iommu_ops fsl_pamu_ops = { .detach_dev = fsl_pamu_detach_device, .domain_window_enable = fsl_pamu_window_enable, .domain_window_disable = fsl_pamu_window_disable, - .domain_get_windows = fsl_pamu_get_windows, - .domain_set_windows = fsl_pamu_set_windows, .iova_to_phys = fsl_pamu_iova_to_phys, .domain_set_attr = fsl_pamu_set_domain_attr, .domain_get_attr = fsl_pamu_get_domain_attr, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c783648d4060..c08ba5d2d451 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -181,8 +181,6 @@ struct iommu_resv_region { * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @domain_window_enable: Configure and enable a particular window for a domain * @domain_window_disable: Disable a particular window for a domain - * @domain_set_windows: Set the number of windows for a domain - * @domain_get_windows: Return the number of windows for a domain * @of_xlate: add OF master IDs to iommu grouping * @pgsize_bitmap: bitmap of all possible supported page sizes */ @@ -223,10 +221,6 @@ struct iommu_ops { int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot); void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr); - /* Set the number of windows per domain */ - int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count); - /* Get the number of windows per domain */ - u32 (*domain_get_windows)(struct iommu_domain *domain); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); From 35449adce847400ca8af25702be112fd67f42439 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 18 Sep 2018 17:38:49 +0300 Subject: [PATCH 22/39] iommu: Fix a typo This patch fixes a typo in iommu.c. Signed-off-by: Rami Rosen Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e9b50abc02a4..8b024b1f60c9 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1425,7 +1425,7 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev) } /* - * IOMMU groups are really the natrual working unit of the IOMMU, but + * IOMMU groups are really the natural working unit of the IOMMU, but * the IOMMU API works on domains and devices. Bridge that gap by * iterating over the devices in a group. Ideally we'd have a single * device which represents the requestor ID of the group, but we also From 657135f3108122556c3cf60a78c6f0e76aeb60e6 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 17 Aug 2018 23:42:22 +0800 Subject: [PATCH 23/39] iommu/arm-smmu-v3: Fix a couple of minor comment typos Fix some comment typos spotted. Signed-off-by: John Garry Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 5059d09f3202..feef1226f705 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -708,7 +708,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q) } /* - * Wait for the SMMU to consume items. If drain is true, wait until the queue + * Wait for the SMMU to consume items. If sync is true, wait until the queue * is empty. Otherwise, wait until there is at least one free slot. */ static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) @@ -2353,8 +2353,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) irq = smmu->combined_irq; if (irq) { /* - * Cavium ThunderX2 implementation doesn't not support unique - * irq lines. Use single irq line for all the SMMUv3 interrupts. + * Cavium ThunderX2 implementation doesn't support unique irq + * lines. Use a single irq line for all the SMMUv3 interrupts. */ ret = devm_request_threaded_irq(smmu->dev, irq, arm_smmu_combined_irq_handler, From 85c7a0f1ef624ef58173ef52ea77780257bdfe04 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 6 Sep 2018 17:59:50 +0100 Subject: [PATCH 24/39] iommu/io-pgtable-arm: Fix race handling in split_blk_unmap() In removing the pagetable-wide lock, we gained the possibility of the vanishingly unlikely case where we have a race between two concurrent unmappers splitting the same block entry. The logic to handle this is fairly straightforward - whoever loses the race frees their partial next-level table and instead dereferences the winner's newly-installed entry in order to fall back to a regular unmap, which intentionally echoes the pre-existing case of recursively splitting a 1GB block down to 4KB pages by installing a full table of 2MB blocks first. Unfortunately, the chump who implemented that logic failed to update the condition check for that fallback, meaning that if said race occurs at the last level (where the loser's unmap_idx is valid) then the unmap won't actually happen. Fix that to properly account for both the race and recursive cases. Fixes: 2c3d273eabe8 ("iommu/io-pgtable-arm: Support lockless operation") Signed-off-by: Robin Murphy [will: re-jig control flow to avoid duplicate cmpxchg test] Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 88641b4560bc..2f79efd16a05 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -574,13 +574,12 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, return 0; tablep = iopte_deref(pte, data); + } else if (unmap_idx >= 0) { + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + return size; } - if (unmap_idx < 0) - return __arm_lpae_unmap(data, iova, size, lvl, tablep); - - io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - return size; + return __arm_lpae_unmap(data, iova, size, lvl, tablep); } static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, From 0f02477d16980938a84aba8688a4e3a303306116 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sun, 19 Aug 2018 15:51:10 +0800 Subject: [PATCH 25/39] iommu/arm-smmu-v3: Fix unexpected CMD_SYNC timeout The condition break condition of: (int)(VAL - sync_idx) >= 0 in the __arm_smmu_sync_poll_msi() polling loop requires that sync_idx must be increased monotonically according to the sequence of the CMDs in the cmdq. However, since the msidata is populated using atomic_inc_return_relaxed() before taking the command-queue spinlock, then the following scenario can occur: CPU0 CPU1 msidata=0 msidata=1 insert cmd1 insert cmd0 smmu execute cmd1 smmu execute cmd0 poll timeout, because msidata=1 is overridden by cmd0, that means VAL=0, sync_idx=1. This is not a functional problem, since the caller will eventually either timeout or exit due to another CMD_SYNC, however it's clearly not what the code is supposed to be doing. Fix it, by incrementing the sequence count with the command-queue lock held, allowing us to drop the atomic operations altogether. Signed-off-by: Zhen Lei [will: dropped the specialised cmd building routine for now] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index feef1226f705..48baffd07aef 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -567,7 +567,7 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; - atomic_t sync_nr; + u32 sync_nr; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -948,14 +948,13 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu) struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, .sync = { - .msidata = atomic_inc_return_relaxed(&smmu->sync_nr), .msiaddr = virt_to_phys(&smmu->sync_count), }, }; - arm_smmu_cmdq_build_cmd(cmd, &ent); - spin_lock_irqsave(&smmu->cmdq.lock, flags); + ent.sync.msidata = ++smmu->sync_nr; + arm_smmu_cmdq_build_cmd(cmd, &ent); arm_smmu_cmdq_insert_cmd(smmu, cmd); spin_unlock_irqrestore(&smmu->cmdq.lock, flags); @@ -2180,7 +2179,6 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; - atomic_set(&smmu->sync_nr, 0); ret = arm_smmu_init_queues(smmu); if (ret) return ret; From 901510ee32f7190902f6fe4affb463e5d86a804c Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sun, 19 Aug 2018 15:51:11 +0800 Subject: [PATCH 26/39] iommu/arm-smmu-v3: Avoid back-to-back CMD_SYNC operations Putting adjacent CMD_SYNCs into the command queue is nonsensical, but can happen when multiple CPUs are inserting commands. Rather than leave the poor old hardware to chew through these operations, we can instead drop the subsequent SYNCs and poll for completion of the first. This has been shown to improve IO performance under pressure, where the number of SYNC operations reduces by about a third: CMD_SYNCs reduced: 19542181 CMD_SYNCs total: 58098548 (include reduced) CMDs total: 116197099 (TLBI:SYNC about 1:1) Signed-off-by: Zhen Lei Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 48baffd07aef..e395f1ff3f81 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -568,6 +568,7 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; u32 sync_nr; + u8 prev_cmd_opcode; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -901,6 +902,8 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) struct arm_smmu_queue *q = &smmu->cmdq.q; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]); + while (queue_insert_raw(q, cmd) == -ENOSPC) { if (queue_poll_cons(q, false, wfe)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); @@ -953,9 +956,16 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu) }; spin_lock_irqsave(&smmu->cmdq.lock, flags); - ent.sync.msidata = ++smmu->sync_nr; - arm_smmu_cmdq_build_cmd(cmd, &ent); - arm_smmu_cmdq_insert_cmd(smmu, cmd); + + /* Piggy-back on the previous command if it's a SYNC */ + if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) { + ent.sync.msidata = smmu->sync_nr; + } else { + ent.sync.msidata = ++smmu->sync_nr; + arm_smmu_cmdq_build_cmd(cmd, &ent); + arm_smmu_cmdq_insert_cmd(smmu, cmd); + } + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); From 07fdef34d2be6811f00c6f9e4e2a1483cf86696c Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 20 Sep 2018 17:10:21 +0100 Subject: [PATCH 27/39] iommu/arm-smmu-v3: Implement flush_iotlb_all hook .flush_iotlb_all is currently stubbed to arm_smmu_iotlb_sync() since the only time it would ever need to actually do anything is for callers doing their own explicit batching, e.g.: iommu_unmap_fast(domain, ...); iommu_unmap_fast(domain, ...); iommu_iotlb_flush_all(domain, ...); where since io-pgtable still issues the TLBI commands implicitly in the unmap instead of implementing .iotlb_range_add, the "flush" only needs to ensure completion of those already-in-flight invalidations. However, we're about to start using it in anger with flush queues, so let's get a proper implementation wired up. Signed-off-by: Zhen Lei Reviewed-by: Robin Murphy [rm: document why it wasn't a bug] Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index e395f1ff3f81..f10c852479fc 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1781,6 +1781,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) return ops->unmap(ops, iova, size); } +static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->smmu) + arm_smmu_tlb_inv_context(smmu_domain); +} + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; @@ -2008,7 +2016,7 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, - .flush_iotlb_all = arm_smmu_iotlb_sync, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, From 7d321bd3542500caf125249f44dc37cb4e738013 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 1 Oct 2018 12:42:49 +0100 Subject: [PATCH 28/39] iommu/arm-smmu: Ensure that page-table updates are visible before TLBI The IO-pgtable code relies on the driver TLB invalidation callbacks to ensure that all page-table updates are visible to the IOMMU page-table walker. In the case that the page-table walker is cache-coherent, we cannot rely on an implicit DSB from the DMA-mapping code, so we must ensure that we execute a DSB in our tlb_add_flush() callback prior to triggering the invalidation. Cc: Cc: Robin Murphy Fixes: 2df7a25ce4a7 ("iommu/arm-smmu: Clean up DMA API usage") Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index fd1b80ef9490..e7cbf4fcf61d 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -469,6 +469,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + if (stage1) { reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; @@ -510,6 +513,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, struct arm_smmu_domain *smmu_domain = cookie; void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } From 2da274cdf998a1c12afa6b5975db2df1df01edf1 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 20 Sep 2018 17:10:22 +0100 Subject: [PATCH 29/39] iommu/dma: Add support for non-strict mode With the flush queue infrastructure already abstracted into IOVA domains, hooking it up in iommu-dma is pretty simple. Since there is a degree of dependency on the IOMMU driver knowing what to do to play along, we key the whole thing off a domain attribute which will be set on default DMA ops domains to request non-strict invalidation. That way, drivers can indicate the appropriate support by acknowledging the attribute, and we can easily fall back to strict invalidation otherwise. The flush queue callback needs a handle on the iommu_domain which owns our cookie, so we have to add a pointer back to that, but neatly, that's also sufficient to indicate whether we're using a flush queue or not, and thus which way to release IOVAs. The only slight subtlety is switching __iommu_dma_unmap() from calling iommu_unmap() to explicit iommu_unmap_fast()/iommu_tlb_sync() so that we can elide the sync entirely in non-strict mode. Signed-off-by: Zhen Lei [rm: convert to domain attribute, tweak comments and commit message] Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/dma-iommu.c | 32 +++++++++++++++++++++++++++++++- include/linux/iommu.h | 1 + 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 511ff9a1d6d9..cc1bf786cfac 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -55,6 +55,9 @@ struct iommu_dma_cookie { }; struct list_head msi_page_list; spinlock_t msi_lock; + + /* Domain for flush queue callback; NULL if flush queue not in use */ + struct iommu_domain *fq_domain; }; static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) @@ -257,6 +260,20 @@ static int iova_reserve_iommu_regions(struct device *dev, return ret; } +static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) +{ + struct iommu_dma_cookie *cookie; + struct iommu_domain *domain; + + cookie = container_of(iovad, struct iommu_dma_cookie, iovad); + domain = cookie->fq_domain; + /* + * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE + * implies that ops->flush_iotlb_all must be non-NULL. + */ + domain->ops->flush_iotlb_all(domain); +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -275,6 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long order, base_pfn, end_pfn; + int attr; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) return -EINVAL; @@ -308,6 +326,13 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, } init_iova_domain(iovad, 1UL << order, base_pfn); + + if (!cookie->fq_domain && !iommu_domain_get_attr(domain, + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) { + cookie->fq_domain = domain; + init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL); + } + if (!dev) return 0; @@ -393,6 +418,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, /* The MSI case is only ever cleaning up its most recent allocation */ if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; + else if (cookie->fq_domain) /* non-strict mode */ + queue_iova(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad), 0); else free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); @@ -408,7 +436,9 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, dma_addr -= iova_off; size = iova_align(iovad, size + iova_off); - WARN_ON(iommu_unmap(domain, dma_addr, size) != size); + WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size); + if (!cookie->fq_domain) + iommu_tlb_sync(domain); iommu_dma_free_iova(cookie, dma_addr, size); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 87994c265bf5..decabe8e8dbe 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -124,6 +124,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING, /* two stages of translation */ + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_MAX, }; From 68a6efe86f6a16e25556a2aff40efad41097b486 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 20 Sep 2018 17:10:23 +0100 Subject: [PATCH 30/39] iommu: Add "iommu.strict" command line option Add a generic command line option to enable lazy unmapping via IOVA flush queues, which will initally be suuported by iommu-dma. This echoes the semantics of "intel_iommu=strict" (albeit with the opposite default value), but in the driver-agnostic fashion of "iommu.passthrough". Signed-off-by: Zhen Lei [rm: move handling out of SMMUv3 driver, clean up documentation] Signed-off-by: Robin Murphy [will: dropped broken printk when parsing command-line option] Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.txt | 12 ++++++++++++ drivers/iommu/iommu.c | 14 ++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9871e649ffef..92ae12aeabf4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1749,6 +1749,18 @@ nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. + iommu.strict= [ARM64] Configure TLB invalidation behaviour + Format: { "0" | "1" } + 0 - Lazy mode. + Request that DMA unmap operations use deferred + invalidation of hardware TLBs, for increased + throughput at the cost of reduced device isolation. + Will fall back to strict mode if not supported by + the relevant IOMMU driver. + 1 - Strict mode (default). + DMA unmap operations invalidate IOMMU hardware TLBs + synchronously. + iommu.passthrough= [ARM64] Configure DMA to bypass the IOMMU by default. Format: { "0" | "1" } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8c15c5980299..2b6dad2aa9f1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -41,6 +41,7 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; #else static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; #endif +static bool iommu_dma_strict __read_mostly = true; struct iommu_callback_data { const struct iommu_ops *ops; @@ -131,6 +132,12 @@ static int __init iommu_set_def_domain_type(char *str) } early_param("iommu.passthrough", iommu_set_def_domain_type); +static int __init iommu_dma_setup(char *str) +{ + return kstrtobool(str, &iommu_dma_strict); +} +early_param("iommu.strict", iommu_dma_setup); + static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { @@ -1072,6 +1079,13 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) group->default_domain = dom; if (!group->domain) group->domain = dom; + + if (dom && !iommu_dma_strict) { + int attr = 1; + iommu_domain_set_attr(dom, + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + &attr); + } } ret = iommu_group_add_device(group, dev); From b6b65ca20bc93d14319f9b5cf98fd3c19a4244e3 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 20 Sep 2018 17:10:24 +0100 Subject: [PATCH 31/39] iommu/io-pgtable-arm: Add support for non-strict mode Non-strict mode is simply a case of skipping 'regular' leaf TLBIs, since the sync is already factored out into ops->iotlb_sync at the core API level. Non-leaf invalidations where we change the page table structure itself still have to be issued synchronously in order to maintain walk caches correctly. To save having to reason about it too much, make sure the invalidation in arm_lpae_split_blk_unmap() just performs its own unconditional sync to minimise the window in which we're technically violating the break- before-make requirement on a live mapping. This might work out redundant with an outer-level sync for strict unmaps, but we'll never be splitting blocks on a DMA fastpath anyway. Signed-off-by: Zhen Lei [rm: tweak comment, commit message, split_blk_unmap logic and barriers] Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 14 ++++++++++++-- drivers/iommu/io-pgtable.h | 5 +++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 2f79efd16a05..237cacd4a62b 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -576,6 +576,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, tablep = iopte_deref(pte, data); } else if (unmap_idx >= 0) { io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + io_pgtable_tlb_sync(&data->iop); return size; } @@ -609,6 +610,13 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, io_pgtable_tlb_sync(iop); ptep = iopte_deref(pte, data); __arm_lpae_free_pgtable(data, lvl + 1, ptep); + } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { + /* + * Order the PTE update against queueing the IOVA, to + * guarantee that a flush callback from a different CPU + * has observed it before the TLBIALL can be issued. + */ + smp_wmb(); } else { io_pgtable_tlb_add_flush(iop, iova, size, size, true); } @@ -771,7 +779,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | + IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -863,7 +872,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA | + IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; data = arm_lpae_alloc_pgtable(cfg); diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 2df79093cad9..47d5ae559329 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -71,12 +71,17 @@ struct io_pgtable_cfg { * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a * software-emulated IOMMU), such that pagetable updates need not * be treated as explicit DMA data. + * + * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs + * on unmap, for DMA domains using the flush queue mechanism for + * delayed invalidation. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) + #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; From 9662b99a19abccb0b7bfc91abb3fec1447c35bf0 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 20 Sep 2018 17:10:25 +0100 Subject: [PATCH 32/39] iommu/arm-smmu-v3: Add support for non-strict mode Now that io-pgtable knows how to dodge strict TLB maintenance, all that's left to do is bridge the gap between the IOMMU core requesting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE for default domains, and showing the appropriate IO_PGTABLE_QUIRK_NON_STRICT flag to alloc_io_pgtable_ops(). Signed-off-by: Zhen Lei [rm: convert to domain attribute, tweak commit message] Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 79 ++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f10c852479fc..db402e8b068b 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -612,6 +612,7 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; + bool non_strict; enum arm_smmu_domain_stage stage; union { @@ -1407,6 +1408,12 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; } + /* + * NOTE: when io-pgtable is in non-strict mode, we may get here with + * PTEs previously cleared by unmaps on the current CPU not yet visible + * to the SMMU. We are relying on the DSB implicit in queue_inc_prod() + * to guarantee those are observed before the TLBI. Do be careful, 007. + */ arm_smmu_cmdq_issue_cmd(smmu, &cmd); __arm_smmu_tlb_sync(smmu); } @@ -1633,6 +1640,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) if (smmu->features & ARM_SMMU_FEAT_COHERENCY) pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + if (smmu_domain->non_strict) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -1934,15 +1944,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + switch (domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); + return 0; + default: + return -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + *(int *)data = smmu_domain->non_strict; + return 0; + default: + return -ENODEV; + } + break; default: - return -ENODEV; + return -EINVAL; } } @@ -1952,26 +1974,37 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - mutex_lock(&smmu_domain->init_mutex); - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; + switch (domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*(int *)data) + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch(attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + smmu_domain->non_strict = *(int *)data; + break; + default: + ret = -ENODEV; } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; default: - ret = -ENODEV; + ret = -EINVAL; } out_unlock: From b2dfeba654cb08db327d0ed4547b66c2f8fce997 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 20 Sep 2018 17:10:26 +0100 Subject: [PATCH 33/39] iommu/io-pgtable-arm-v7s: Add support for non-strict mode As for LPAE, it's simply a case of skipping the leaf invalidation for a regular unmap, and ensuring that the one in split_blk_unmap() is paired with an explicit sync ASAP rather than relying on one which might only eventually happen way down the line. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index b5948ba6b3b3..445c3bde0480 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -587,6 +587,7 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, } io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + io_pgtable_tlb_sync(&data->iop); return size; } @@ -642,6 +643,13 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, io_pgtable_tlb_sync(iop); ptep = iopte_deref(pte[i], lvl); __arm_v7s_free_table(ptep, lvl + 1, data); + } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { + /* + * Order the PTE update against queueing the IOVA, to + * guarantee that a flush callback from a different CPU + * has observed it before the TLBIALL can be issued. + */ + smp_wmb(); } else { io_pgtable_tlb_add_flush(iop, iova, blk_size, blk_size, true); @@ -712,7 +720,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_4GB | - IO_PGTABLE_QUIRK_NO_DMA)) + IO_PGTABLE_QUIRK_NO_DMA | + IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ From 44f6876a00e83df5fd28681502b19b0f51e4a3c6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 20 Sep 2018 17:10:27 +0100 Subject: [PATCH 34/39] iommu/arm-smmu: Support non-strict mode All we need is to wire up .flush_iotlb_all properly and implement the domain attribute, and iommu-dma and io-pgtable will do the rest for us. The only real subtlety is documenting the barrier semantics we're introducing between io-pgtable and the drivers for non-strict flushes. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 93 ++++++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e7cbf4fcf61d..1030027cbcc6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -246,6 +246,7 @@ struct arm_smmu_domain { const struct iommu_gather_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; + bool non_strict; struct mutex init_mutex; /* Protects smmu pointer */ spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ struct iommu_domain domain; @@ -447,7 +448,11 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie) struct arm_smmu_cfg *cfg = &smmu_domain->cfg; void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); - writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); + /* + * NOTE: this is not a relaxed write; it needs to guarantee that PTEs + * cleared by the current CPU are visible to the SMMU before the TLBI. + */ + writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); arm_smmu_tlb_sync_context(cookie); } @@ -457,7 +462,8 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *base = ARM_SMMU_GR0(smmu); - writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); + /* NOTE: see above */ + writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); arm_smmu_tlb_sync_global(smmu); } @@ -869,6 +875,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + if (smmu_domain->non_strict) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; + smmu_domain->smmu = smmu; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) { @@ -1258,6 +1267,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return ops->unmap(ops, iova, size); } +static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->tlb_ops) + smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); +} + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -1476,15 +1493,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + switch(domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); + return 0; + default: + return -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + *(int *)data = smmu_domain->non_strict; + return 0; + default: + return -ENODEV; + } + break; default: - return -ENODEV; + return -EINVAL; } } @@ -1494,28 +1523,38 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - mutex_lock(&smmu_domain->init_mutex); - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; + switch(domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*(int *)data) + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + smmu_domain->non_strict = *(int *)data; + break; + default: + ret = -ENODEV; } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; default: - ret = -ENODEV; + ret = -EINVAL; } - out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; @@ -1568,7 +1607,7 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, - .flush_iotlb_all = arm_smmu_iotlb_sync, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, From 24d2c521749d8547765b555b7a85cca179bb2275 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Oct 2018 12:32:46 +0200 Subject: [PATCH 35/39] iommu/amd: Move iommu_init_pci() to .init section The function is only called from another __init function, so it should be moved to .init too. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 84b3e4445d46..9c3d610e1e19 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1709,7 +1709,7 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; -static int iommu_init_pci(struct amd_iommu *iommu) +static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; u32 range, misc, low, high; From a089845b3ed26bab5be07ed20493e8576bb86b9c Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 4 Oct 2018 17:25:47 +0100 Subject: [PATCH 36/39] dt-bindings: iommu: ipmmu-vmsa: Add r8a7744 support Document RZ/G1N (R8A7744) SoC bindings. Signed-off-by: Biju Das Reviewed-by: Chris Paterson Reviewed-by: Simon Horman Signed-off-by: Joerg Roedel --- Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt index c6e2d855fe13..377ee639d103 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -12,6 +12,7 @@ Required Properties: - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU. - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU. + - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU. - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU. - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. From e84b7cc457f99ff5d034914e78e1c329df89ebee Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 8 Oct 2018 10:24:19 +0800 Subject: [PATCH 37/39] iommu/amd: Add default branch in amd_iommu_capable() Recent gcc warns about switching on an enumeration, but not having an explicit case statement for all members of the enumeration. To show the compiler this is intentional, we simply add a default case with nothing more than a break statement. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 73e47d93e7a0..e27f5ee45f8a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3083,6 +3083,8 @@ static bool amd_iommu_capable(enum iommu_cap cap) return (irq_remapping_enabled == 1); case IOMMU_CAP_NOEXEC: return false; + default: + break; } return false; From 2d7ca2c4b1991667f8ae142208046a12c8493f91 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Wed, 10 Oct 2018 11:29:26 +0100 Subject: [PATCH 38/39] iommu/arm-smmu-v3: Add SPDX header Replace license text with SDPX header Signed-off-by: Andrew Murray Acked-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index db402e8b068b..c7bd18ea4a48 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IOMMU API for ARM architected SMMUv3 implementations. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * * Copyright (C) 2015 ARM Limited * * Author: Will Deacon From 5e731073bc0a4a53a213412dbd33982d829560f1 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Wed, 10 Oct 2018 11:29:27 +0100 Subject: [PATCH 39/39] iommu/arm-smmu-v3: Remove unnecessary wrapper function Simplify the code by removing an unnecessary wrapper function. This was left behind by commit 2f657add07a8 ("iommu/arm-smmu-v3: Specialise CMD_SYNC handling") Signed-off-by: Andrew Murray Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index c7bd18ea4a48..6947ccf26512 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1371,15 +1371,11 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) } /* IO_PGTABLE API */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) -{ - arm_smmu_cmdq_issue_sync(smmu); -} - static void arm_smmu_tlb_sync(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - __arm_smmu_tlb_sync(smmu_domain->smmu); + + arm_smmu_cmdq_issue_sync(smmu_domain->smmu); } static void arm_smmu_tlb_inv_context(void *cookie) @@ -1404,7 +1400,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) * to guarantee those are observed before the TLBI. Do be careful, 007. */ arm_smmu_cmdq_issue_cmd(smmu, &cmd); - __arm_smmu_tlb_sync(smmu); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, @@ -1793,7 +1789,7 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain) struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; if (smmu) - __arm_smmu_tlb_sync(smmu); + arm_smmu_cmdq_issue_sync(smmu); } static phys_addr_t